diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 73b2155482e..e045170561d 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -6,6 +6,7 @@ tests/ci/cancel_and_rerun_workflow_lambda/app.py --> ### Changelog category (leave one): - New Feature +- Experimental Feature - Improvement - Performance Improvement - Backward Incompatible Change @@ -48,9 +49,7 @@ At a minimum, the following information should be added (but add more as needed) - [ ] Allow: Stateful tests - [ ] Allow: Integration Tests - [ ] Allow: Performance tests -- [ ] Allow: Normal Builds -- [ ] Allow: Special Builds -- [ ] Allow: All NOT Required Checks +- [ ] Allow: All Builds - [ ] Allow: batch 1, 2 for multi-batch jobs - [ ] Allow: batch 3, 4, 5, 6 for multi-batch jobs --- @@ -61,6 +60,7 @@ At a minimum, the following information should be added (but add more as needed) - [ ] Exclude: All with aarch64, release, debug --- - [ ] Do not test +- [ ] Woolen Wolfdog - [ ] Upload binaries for special builds - [ ] Disable merge-commit - [ ] Disable CI cache diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index c8c6ba30b0b..64c3d2f8342 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -70,7 +70,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Compatibility check (amd64) + test_name: Compatibility check (release) runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} CompatibilityCheckAarch64: @@ -159,33 +159,24 @@ jobs: ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ - BuilderReport: + Builds_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderDebAarch64 - - BuilderDebAsan - - BuilderDebDebug - - BuilderDebRelease - - BuilderDebTsan - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - BuilderSpecialReport: - # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderBinDarwin - - BuilderBinDarwinAarch64 - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse special build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} + needs: [RunConfig, BuilderDebAarch64, BuilderDebAsan, BuilderDebDebug, BuilderDebRelease, BuilderDebTsan, BuilderBinDarwin, BuilderBinDarwinAarch64] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Download reports + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds + - name: Builds report + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_tsan package_debug binary_darwin binary_darwin_aarch64 + - name: Set status + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds ############################################################################################ #################################### INSTALL PACKAGES ###################################### ############################################################################################ @@ -194,7 +185,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Install packages (amd64) + test_name: Install packages (release) runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} run_command: | @@ -204,7 +195,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Install packages (arm64) + test_name: Install packages (aarch64) runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} run_command: | @@ -256,8 +247,7 @@ jobs: FinishCheck: if: ${{ !failure() && !cancelled() }} needs: - - BuilderReport - - BuilderSpecialReport + - Builds_Report - FunctionalStatelessTestAsan - FunctionalStatefulTestDebug - StressTestTsan @@ -273,5 +263,8 @@ jobs: - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" + # update mergeable check + python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} python3 merge_pr.py diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml new file mode 100644 index 00000000000..3988df3b2b1 --- /dev/null +++ b/.github/workflows/create_release.yml @@ -0,0 +1,29 @@ +name: CreateRelease + +concurrency: + group: release + +'on': + workflow_dispatch: + inputs: + sha: + description: 'The SHA hash of the commit from which to create the release' + required: true + type: string + type: + description: 'The type of release: "new" for a new release or "patch" for a patch release' + required: true + type: choice + options: + - new + - patch + +jobs: + Release: + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Print greeting + run: | + python3 ./tests/ci/release.py --commit ${{ inputs.sha }} --type ${{ inputs.type }} --dry-run diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f5c78a6b6a1..2a7e6f737ab 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -104,10 +104,9 @@ jobs: with: stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} - # stage for jobs that do not prohibit merge Tests_3: # Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there. - needs: [RunConfig, Builds_1, Builds_2] + needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} uses: ./.github/workflows/reusable_test_stage.yml with: @@ -115,23 +114,14 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} ################################# Reports ################################# - # Reports should be run even if Builds_1/2 failed - put them separately in wf (not in Tests_1/2) - Builds_1_Report: + # Reports should run even if Builds_1/2 fail - run them separately, not in Tests_1/2/3 + Builds_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} - needs: [RunConfig, Builds_1] + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} + needs: [RunConfig, Builds_1, Builds_2] uses: ./.github/workflows/reusable_test.yml with: - test_name: ClickHouse build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - Builds_2_Report: - # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }} - needs: [RunConfig, Builds_2] - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse special build check + test_name: Builds runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} @@ -165,7 +155,7 @@ jobs: FinishCheck: if: ${{ !cancelled() }} - needs: [RunConfig, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] + needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index cfa01b0e8f3..01685ee1f5a 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -96,20 +96,15 @@ jobs: stage: Tests_1 data: ${{ needs.RunConfig.outputs.data }} - ################################# Stage Final ################################# - # - FinishCheck: - if: ${{ !cancelled() }} + CheckReadyForMerge: + if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} + # Test_2 or Test_3 must not have jobs required for Mergeable check needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 - - name: Check sync status + - name: Check and set merge status run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 sync_pr.py --status - - name: Finish label - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 66ca3381a40..4764e6d3c1a 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -126,16 +126,16 @@ jobs: with: stage: Builds_2 data: ${{ needs.RunConfig.outputs.data }} + # stage for running non-required checks without being blocked by required checks (Test_1) if corresponding settings is selected Tests_2: - needs: [RunConfig, Builds_2] + needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} uses: ./.github/workflows/reusable_test_stage.yml with: stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} - # stage for jobs that do not prohibit merge Tests_3: - needs: [RunConfig, Builds_1, Tests_1, Builds_2, Tests_2] + needs: [RunConfig, Builds_1, Tests_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} uses: ./.github/workflows/reusable_test_stage.yml with: @@ -143,29 +143,21 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} ################################# Reports ################################# - # Reports should by run even if Builds_1/2 fail, so put them separately in wf (not in Tests_1/2) - Builds_1_Report: + # Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3) + Builds_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} - needs: [RunConfig, StyleCheck, Builds_1] + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} + needs: [RunConfig, StyleCheck, Builds_1, Builds_2] uses: ./.github/workflows/reusable_test.yml with: - test_name: ClickHouse build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - Builds_2_Report: - # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }} - needs: [RunConfig, StyleCheck, Builds_2] - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse special build check + test_name: Builds runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} - needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2] + # Test_2 or Test_3 must not have jobs required for Mergeable check + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code @@ -181,7 +173,7 @@ jobs: # FinishCheck: if: ${{ !cancelled() }} - needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code @@ -204,8 +196,7 @@ jobs: concurrency: group: jepsen if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse Keeper Jepsen') }} - # jepsen needs binary_release build which is in Builds_2 - needs: [RunConfig, Builds_2] + needs: [RunConfig, Builds_1] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse Keeper Jepsen diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index f9b8a4fa764..6bf846d7535 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -65,7 +65,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Compatibility check (amd64) + test_name: Compatibility check (release) runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} CompatibilityCheckAarch64: @@ -176,35 +176,24 @@ jobs: ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ - BuilderReport: + Builds_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderDebRelease - - BuilderDebAarch64 - - BuilderDebAsan - - BuilderDebTsan - - BuilderDebUBsan - - BuilderDebMsan - - BuilderDebDebug - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - BuilderSpecialReport: - # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderBinDarwin - - BuilderBinDarwinAarch64 - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse special build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64, BuilderDebAsan, BuilderDebUBsan, BuilderDebMsan, BuilderDebTsan, BuilderDebDebug, BuilderBinDarwin, BuilderBinDarwinAarch64] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Download reports + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds + - name: Builds report + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_msan package_ubsan package_tsan package_debug binary_darwin binary_darwin_aarch64 + - name: Set status + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds MarkReleaseReady: if: ${{ !failure() && !cancelled() }} needs: @@ -244,7 +233,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Install packages (amd64) + test_name: Install packages (release) runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} run_command: | @@ -254,7 +243,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Install packages (arm64) + test_name: Install packages (aarch64) runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} run_command: | @@ -460,8 +449,7 @@ jobs: needs: - DockerServerImage - DockerKeeperImage - - BuilderReport - - BuilderSpecialReport + - Builds_Report - MarkReleaseReady - FunctionalStatelessTestDebug - FunctionalStatelessTestRelease @@ -496,4 +484,7 @@ jobs: - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" + # update mergeable check + python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 4891b79e4c7..19258f469c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v24.6, 2024-06-30](#246)**
**[ClickHouse release v24.5, 2024-05-30](#245)**
**[ClickHouse release v24.4, 2024-04-30](#244)**
**[ClickHouse release v24.3 LTS, 2024-03-26](#243)**
@@ -8,6 +9,169 @@ # 2024 Changelog +### ClickHouse release 24.6, 2024-06-30 + +#### Backward Incompatible Change +* Enable asynchronous load of databases and tables by default. See the `async_load_databases` in config.xml. While this change is fully compatible, it can introduce a difference in behavior. When `async_load_databases` is false, as in the previous versions, the server will not accept connections until all tables are loaded. When `async_load_databases` is true, as in the new version, the server can accept connections before all the tables are loaded. If a query is made to a table that is not yet loaded, it will wait for the table's loading, which can take considerable time. It can change the behavior of the server if it is part of a large distributed system under a load balancer. In the first case, the load balancer can get a connection refusal and quickly failover to another server. In the second case, the load balancer can connect to a server that is still loading the tables, and the query will have a higher latency. Moreover, if many queries accumulate in the waiting state, it can lead to a "thundering herd" problem when they start processing simultaneously. This can make a difference only for highly loaded distributed backends. You can set the value of `async_load_databases` to false to avoid this problem. [#57695](https://github.com/ClickHouse/ClickHouse/pull/57695) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Setting `replace_long_file_name_to_hash` is enabled by default for `MergeTree` tables. [#64457](https://github.com/ClickHouse/ClickHouse/pull/64457) ([Anton Popov](https://github.com/CurtizJ)). This setting is fully compatible, and no actions needed during upgrade. The new data format is supported from all versions starting from 23.9. After enabling this setting, you can no longer downgrade to a version 23.8 or older. +* Some invalid queries will fail earlier during parsing. Note: disabled the support for inline KQL expressions (the experimental Kusto language) when they are put into a `kql` table function without a string literal, e.g. `kql(garbage | trash)` instead of `kql('garbage | trash')` or `kql($$garbage | trash$$)`. This feature was introduced unintentionally and should not exist. [#61500](https://github.com/ClickHouse/ClickHouse/pull/61500) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rework parallel processing in `Ordered` mode of storage `S3Queue`. This PR is backward incompatible for Ordered mode if you used settings `s3queue_processing_threads_num` or `s3queue_total_shards_num`. Setting `s3queue_total_shards_num` is deleted, previously it was allowed to use only under `s3queue_allow_experimental_sharded_mode`, which is now deprecated. A new setting is added - `s3queue_buckets`. [#64349](https://github.com/ClickHouse/ClickHouse/pull/64349) ([Kseniia Sumarokova](https://github.com/kssenii)). +* New functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` were added. Unlike the existing functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake`, the new functions are compatible with function `generateSnowflakeID`, i.e. they accept the snowflake IDs generated by `generateSnowflakeID` and produce snowflake IDs of the same type as `generateSnowflakeID` (i.e. `UInt64`). Furthermore, the new functions default to the UNIX epoch (aka. 1970-01-01), just like `generateSnowflakeID`. If necessary, a different epoch, e.g. Twitter's/X's epoch 2010-11-04 aka. 1288834974657 msec since UNIX epoch, can be passed. The old conversion functions are deprecated and will be removed after a transition period: to use them regardless, enable setting `allow_deprecated_snowflake_conversion_functions`. [#64948](https://github.com/ClickHouse/ClickHouse/pull/64948) ([Robert Schulze](https://github.com/rschu1ze)). + +#### New Feature +* Allow to store named collections in ClickHouse Keeper. [#64574](https://github.com/ClickHouse/ClickHouse/pull/64574) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support empty tuples. [#55061](https://github.com/ClickHouse/ClickHouse/pull/55061) ([Amos Bird](https://github.com/amosbird)). +* Add Hilbert Curve encode and decode functions. [#60156](https://github.com/ClickHouse/ClickHouse/pull/60156) ([Artem Mustafin](https://github.com/Artemmm91)). +* Add support for index analysis over `hilbertEncode`. [#64662](https://github.com/ClickHouse/ClickHouse/pull/64662) ([Artem Mustafin](https://github.com/Artemmm91)). +* Added support for reading `LINESTRING` geometry in the WKT format using function `readWKTLineString`. [#62519](https://github.com/ClickHouse/ClickHouse/pull/62519) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow to attach parts from a different disk. [#63087](https://github.com/ClickHouse/ClickHouse/pull/63087) ([Unalian](https://github.com/Unalian)). +* Added new SQL functions `generateSnowflakeID` for generating Twitter-style Snowflake IDs. [#63577](https://github.com/ClickHouse/ClickHouse/pull/63577) ([Danila Puzov](https://github.com/kazalika)). +* Added `merge_workload` and `mutation_workload` settings to regulate how resources are utilized and shared between merges, mutations and other workloads. [#64061](https://github.com/ClickHouse/ClickHouse/pull/64061) ([Sergei Trifonov](https://github.com/serxa)). +* Add support for comparing `IPv4` and `IPv6` types using the `=` operator. [#64292](https://github.com/ClickHouse/ClickHouse/pull/64292) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Support decimal arguments in binary math functions (pow, atan2, max2, min2, hypot). [#64582](https://github.com/ClickHouse/ClickHouse/pull/64582) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Added SQL functions `parseReadableSize` (along with `OrNull` and `OrZero` variants). [#64742](https://github.com/ClickHouse/ClickHouse/pull/64742) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Add server settings `max_table_num_to_throw` and `max_database_num_to_throw` to limit the number of databases or tables on `CREATE` queries. [#64781](https://github.com/ClickHouse/ClickHouse/pull/64781) ([Xu Jia](https://github.com/XuJia0210)). +* Add `_time` virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)). +* Introduced new functions `base64URLEncode`, `base64URLDecode` and `tryBase64URLDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Add new function `editDistanceUTF8`, which calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. [#65269](https://github.com/ClickHouse/ClickHouse/pull/65269) ([LiuNeng](https://github.com/liuneng1994)). +* Add `http_response_headers` setting to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)). +* Added a new table function `loop` to support returning query results in an infinite loop. [#63452](https://github.com/ClickHouse/ClickHouse/pull/63452) ([Sariel](https://github.com/sarielwxm)). This is useful for testing. +* Introduced two additional columns in the `system.query_log`: `used_privileges` and `missing_privileges`. `used_privileges` is populated with the privileges that were checked during query execution, and `missing_privileges` contains required privileges that are missing. [#64597](https://github.com/ClickHouse/ClickHouse/pull/64597) ([Alexey Katsman](https://github.com/alexkats)). +* Added a setting `output_format_pretty_display_footer_column_names` which when enabled displays column names at the end of the table for long tables (50 rows by default), with the threshold value for minimum number of rows controlled by `output_format_pretty_display_footer_column_names_min_rows`. [#65144](https://github.com/ClickHouse/ClickHouse/pull/65144) ([Shaun Struwig](https://github.com/Blargian)). + +#### Experimental Feature +* Introduce statistics of type "number of distinct values". [#59357](https://github.com/ClickHouse/ClickHouse/pull/59357) ([Han Fei](https://github.com/hanfei1991)). +* Support statistics with ReplicatedMergeTree. [#64934](https://github.com/ClickHouse/ClickHouse/pull/64934) ([Han Fei](https://github.com/hanfei1991)). +* If "replica group" is configured for a `Replicated` database, automatically create a cluster that includes replicas from all groups. [#64312](https://github.com/ClickHouse/ClickHouse/pull/64312) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add settings `parallel_replicas_custom_key_range_lower` and `parallel_replicas_custom_key_range_upper` to control how parallel replicas with dynamic shards parallelizes queries when using a range filter. [#64604](https://github.com/ClickHouse/ClickHouse/pull/64604) ([josh-hildred](https://github.com/josh-hildred)). + +#### Performance Improvement +* Add the ability to reshuffle rows during insert to optimize for size without violating the order set by `PRIMARY KEY`. It's controlled by the setting `optimize_row_order` (off by default). [#63578](https://github.com/ClickHouse/ClickHouse/pull/63578) ([Igor Markelov](https://github.com/ElderlyPassionFruit)). +* Add a native parquet reader, which can read parquet binary to ClickHouse Columns directly. It's controlled by the setting `input_format_parquet_use_native_reader` (disabled by default). [#60361](https://github.com/ClickHouse/ClickHouse/pull/60361) ([ZhiHong Zhang](https://github.com/copperybean)). +* Support partial trivial count optimization when the query filter is able to select exact ranges from merge tree tables. [#60463](https://github.com/ClickHouse/ClickHouse/pull/60463) ([Amos Bird](https://github.com/amosbird)). +* Reduce max memory usage of multithreaded `INSERT`s by collecting chunks of multiple threads in a single transform. [#61047](https://github.com/ClickHouse/ClickHouse/pull/61047) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Reduce the memory usage when using Azure object storage by using fixed memory allocation, avoiding the allocation of an extra buffer. [#63160](https://github.com/ClickHouse/ClickHouse/pull/63160) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Reduce the number of virtual function calls in `ColumnNullable::size`. [#60556](https://github.com/ClickHouse/ClickHouse/pull/60556) ([HappenLee](https://github.com/HappenLee)). +* Speedup `splitByRegexp` when the regular expression argument is a single-character. [#62696](https://github.com/ClickHouse/ClickHouse/pull/62696) ([Robert Schulze](https://github.com/rschu1ze)). +* Speed up aggregation by 8-bit and 16-bit keys by keeping track of the min and max keys used. This allows to reduce the number of cells that need to be verified. [#62746](https://github.com/ClickHouse/ClickHouse/pull/62746) ([Jiebin Sun](https://github.com/jiebinn)). +* Optimize operator IN when the left hand side is `LowCardinality` and the right is a set of constants. [#64060](https://github.com/ClickHouse/ClickHouse/pull/64060) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Use a thread pool to initialize and destroy hash tables inside `ConcurrentHashJoin`. [#64241](https://github.com/ClickHouse/ClickHouse/pull/64241) ([Nikita Taranov](https://github.com/nickitat)). +* Optimized vertical merges in tables with sparse columns. [#64311](https://github.com/ClickHouse/ClickHouse/pull/64311) ([Anton Popov](https://github.com/CurtizJ)). +* Enabled prefetches of data from remote filesystem during vertical merges. It improves latency of vertical merges in tables with data stored on remote filesystem. [#64314](https://github.com/ClickHouse/ClickHouse/pull/64314) ([Anton Popov](https://github.com/CurtizJ)). +* Reduce redundant calls to `isDefault` of `ColumnSparse::filter` to improve performance. [#64426](https://github.com/ClickHouse/ClickHouse/pull/64426) ([Jiebin Sun](https://github.com/jiebinn)). +* Speedup `find_super_nodes` and `find_big_family` keeper-client commands by making multiple asynchronous getChildren requests. [#64628](https://github.com/ClickHouse/ClickHouse/pull/64628) ([Alexander Gololobov](https://github.com/davenger)). +* Improve function `least`/`greatest` for nullable numberic type arguments. [#64668](https://github.com/ClickHouse/ClickHouse/pull/64668) ([KevinyhZou](https://github.com/KevinyhZou)). +* Allow merging two consequent filtering steps of a query plan. This improves filter-push-down optimization if the filter condition can be pushed down from the parent step. [#64760](https://github.com/ClickHouse/ClickHouse/pull/64760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove bad optimization in the vertical final implementation and re-enable vertical final algorithm by default. [#64783](https://github.com/ClickHouse/ClickHouse/pull/64783) ([Duc Canh Le](https://github.com/canhld94)). +* Remove ALIAS nodes from the filter expression. This slightly improves performance for queries with `PREWHERE` (with the new analyzer). [#64793](https://github.com/ClickHouse/ClickHouse/pull/64793) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)). +* Added settings to disable materialization of skip indexes and statistics on inserts (`materialize_skip_indexes_on_insert` and `materialize_statistics_on_insert`). [#64391](https://github.com/ClickHouse/ClickHouse/pull/64391) ([Anton Popov](https://github.com/CurtizJ)). +* Use the allocated memory size to calculate the row group size and reduce the peak memory of the parquet writer in the single-threaded mode. [#64424](https://github.com/ClickHouse/ClickHouse/pull/64424) ([LiuNeng](https://github.com/liuneng1994)). +* Improve the iterator of sparse column to reduce call of `size`. [#64497](https://github.com/ClickHouse/ClickHouse/pull/64497) ([Jiebin Sun](https://github.com/jiebinn)). +* Update condition to use server-side copy for backups to Azure blob storage. [#64518](https://github.com/ClickHouse/ClickHouse/pull/64518) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Optimized memory usage of vertical merges for tables with high number of skip indexes. [#64580](https://github.com/ClickHouse/ClickHouse/pull/64580) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement +* `SHOW CREATE TABLE` executed on top of system tables will now show the super handy comment unique for each table which will explain why this table is needed. [#63788](https://github.com/ClickHouse/ClickHouse/pull/63788) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* The second argument (scale) of functions `round()`, `roundBankers()`, `floor()`, `ceil()` and `trunc()` can now be non-const. [#64798](https://github.com/ClickHouse/ClickHouse/pull/64798) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Hot reload storage policy for `Distributed` tables when adding a new disk. [#58285](https://github.com/ClickHouse/ClickHouse/pull/58285) ([Duc Canh Le](https://github.com/canhld94)). +* Avoid possible deadlock during MergeTree index analysis when scheduling threads in a saturated service. [#59427](https://github.com/ClickHouse/ClickHouse/pull/59427) ([Sean Haynes](https://github.com/seandhaynes)). +* Several minor corner case fixes to S3 proxy support & tunneling. [#63427](https://github.com/ClickHouse/ClickHouse/pull/63427) ([Arthur Passos](https://github.com/arthurpassos)). +* Improve io_uring resubmit visibility. Rename profile event `IOUringSQEsResubmits` -> `IOUringSQEsResubmitsAsync` and add a new one `IOUringSQEsResubmitsSync`. [#63699](https://github.com/ClickHouse/ClickHouse/pull/63699) ([Tomer Shafir](https://github.com/tomershafir)). +* Added a new setting, `metadata_keep_free_space_bytes` to keep free space on the metadata storage disk. [#64128](https://github.com/ClickHouse/ClickHouse/pull/64128) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Add metrics to track the number of directories created and removed by the `plain_rewritable` metadata storage, and the number of entries in the local-to-remote in-memory map. [#64175](https://github.com/ClickHouse/ClickHouse/pull/64175) ([Julia Kartseva](https://github.com/jkartseva)). +* The query cache now considers identical queries with different settings as different. This increases robustness in cases where different settings (e.g. `limit` or `additional_table_filters`) would affect the query result. [#64205](https://github.com/ClickHouse/ClickHouse/pull/64205) ([Robert Schulze](https://github.com/rschu1ze)). +* Support the non standard error code `QpsLimitExceeded` in object storage as a retryable error. [#64225](https://github.com/ClickHouse/ClickHouse/pull/64225) ([Sema Checherinda](https://github.com/CheSema)). +* Forbid converting a MergeTree table to replicated if the zookeeper path for this table already exists. [#64244](https://github.com/ClickHouse/ClickHouse/pull/64244) ([Kirill](https://github.com/kirillgarbar)). +* Added a new setting `input_format_parquet_prefer_block_bytes` to control the average output block bytes, and modified the default value of `input_format_parquet_max_block_size` to 65409. [#64427](https://github.com/ClickHouse/ClickHouse/pull/64427) ([LiuNeng](https://github.com/liuneng1994)). +* Allow proxy to be bypassed for hosts specified in `no_proxy` env variable and ClickHouse proxy configuration. [#63314](https://github.com/ClickHouse/ClickHouse/pull/63314) ([Arthur Passos](https://github.com/arthurpassos)). +* Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)). +* Settings from the user's config don't affect merges and mutations for `MergeTree` on top of object storage. [#64456](https://github.com/ClickHouse/ClickHouse/pull/64456) ([alesapin](https://github.com/alesapin)). +* Support the non standard error code `TotalQpsLimitExceeded` in object storage as a retryable error. [#64520](https://github.com/ClickHouse/ClickHouse/pull/64520) ([Sema Checherinda](https://github.com/CheSema)). +* Updated Advanced Dashboard for both open-source and ClickHouse Cloud versions to include a chart for 'Maximum concurrent network connections'. [#64610](https://github.com/ClickHouse/ClickHouse/pull/64610) ([Thom O'Connor](https://github.com/thomoco)). +* Improve progress report on `zeros_mt` and `generateRandom`. [#64804](https://github.com/ClickHouse/ClickHouse/pull/64804) ([Raúl Marín](https://github.com/Algunenano)). +* Add an asynchronous metric `jemalloc.profile.active` to show whether sampling is currently active. This is an activation mechanism in addition to prof.active; both must be active for the calling thread to sample. [#64842](https://github.com/ClickHouse/ClickHouse/pull/64842) ([Unalian](https://github.com/Unalian)). +* Remove mark of `allow_experimental_join_condition` as important. This mark may have prevented distributed queries in a mixed versions cluster from being executed successfully. [#65008](https://github.com/ClickHouse/ClickHouse/pull/65008) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added server Asynchronous metrics `DiskGetObjectThrottler*` and `DiskGetObjectThrottler*` reflecting request per second rate limit defined with `s3_max_get_rps` and `s3_max_put_rps` disk settings and currently available number of requests that could be sent without hitting throttling limit on the disk. Metrics are defined for every disk that has a configured limit. [#65050](https://github.com/ClickHouse/ClickHouse/pull/65050) ([Sergei Trifonov](https://github.com/serxa)). +* Initialize global trace collector for `Poco::ThreadPool` (needed for Keeper, etc). [#65239](https://github.com/ClickHouse/ClickHouse/pull/65239) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add a validation when creating a user with `bcrypt_hash`. [#65242](https://github.com/ClickHouse/ClickHouse/pull/65242) ([Raúl Marín](https://github.com/Algunenano)). +* Add profile events for number of rows read during/after `PREWHERE`. [#64198](https://github.com/ClickHouse/ClickHouse/pull/64198) ([Nikita Taranov](https://github.com/nickitat)). +* Print query in `EXPLAIN PLAN` with parallel replicas. [#64298](https://github.com/ClickHouse/ClickHouse/pull/64298) ([vdimir](https://github.com/vdimir)). +* Rename `allow_deprecated_functions` to `allow_deprecated_error_prone_window_functions`. [#64358](https://github.com/ClickHouse/ClickHouse/pull/64358) ([Raúl Marín](https://github.com/Algunenano)). +* Respect `max_read_buffer_size` setting for file descriptors as well in the `file` table function. [#64532](https://github.com/ClickHouse/ClickHouse/pull/64532) ([Azat Khuzhin](https://github.com/azat)). +* Disable transactions for unsupported storages even for materialized views. [#64918](https://github.com/ClickHouse/ClickHouse/pull/64918) ([alesapin](https://github.com/alesapin)). +* Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* A bug in Apache ORC library was fixed: Fixed ORC statistics calculation, when writing, for unsigned types on all platforms and Int8 on ARM. [#64563](https://github.com/ClickHouse/ClickHouse/pull/64563) ([Michael Kolupaev](https://github.com/al13n321)). +* Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix a permission error where a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)). +* Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)). +* Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix crash with `DISTINCT` and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)). +* Fixed 'set' skip index not working with IN and indexHint(). [#62083](https://github.com/ClickHouse/ClickHouse/pull/62083) ([Michael Kolupaev](https://github.com/al13n321)). +* Support executing function during assignment of parameterized view value. [#63502](https://github.com/ClickHouse/ClickHouse/pull/63502) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fixed parquet memory tracking. [#63584](https://github.com/ClickHouse/ClickHouse/pull/63584) ([Michael Kolupaev](https://github.com/al13n321)). +* Fixed reading of columns of type `Tuple(Map(LowCardinality(String), String), ...)`. [#63956](https://github.com/ClickHouse/ClickHouse/pull/63956) ([Anton Popov](https://github.com/CurtizJ)). +* Fix an `Cyclic aliases` error for cyclic aliases of different type (expression and function). [#63993](https://github.com/ClickHouse/ClickHouse/pull/63993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline. [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)). +* Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)). +* The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `duplicate alias` error for distributed queries with `ARRAY JOIN`. [#64226](https://github.com/ClickHouse/ClickHouse/pull/64226) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix unexpected accurateCast from string to integer. [#64255](https://github.com/ClickHouse/ClickHouse/pull/64255) ([wudidapaopao](https://github.com/wudidapaopao)). +* Fixed CNF simplification, in case any OR group contains mutually exclusive atoms. [#64256](https://github.com/ClickHouse/ClickHouse/pull/64256) ([Eduard Karacharov](https://github.com/korowa)). +* Fix Query Tree size validation. [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)). +* Fix `Logical error: Bad cast` for `Buffer` table with `PREWHERE`. [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Prevent recursive logging in `blob_storage_log` when it's stored on object storage. [#64393](https://github.com/ClickHouse/ClickHouse/pull/64393) ([vdimir](https://github.com/vdimir)). +* Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed `optimize_read_in_order` behaviour for ORDER BY ... NULLS FIRST / LAST on tables with nullable keys. [#64483](https://github.com/ClickHouse/ClickHouse/pull/64483) ([Eduard Karacharov](https://github.com/korowa)). +* Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.`. [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)). +* Fix the output of function `formatDateTimeInJodaSyntax` when a formatter generates an uneven number of characters and the last character is `0`. For example, `SELECT formatDateTimeInJodaSyntax(toDate('2012-05-29'), 'D')` now correctly returns `150` instead of previously `15`. [#64614](https://github.com/ClickHouse/ClickHouse/pull/64614) ([LiuNeng](https://github.com/liuneng1994)). +* Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)). +* Fix type inference for float (in case of small buffer, i.e. `--max_read_buffer_size 1`). [#64641](https://github.com/ClickHouse/ClickHouse/pull/64641) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug which could lead to non-working TTLs with expressions. [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)). +* Fix removing the `WHERE` and `PREWHERE` expressions, which are always true (for the new analyzer). [#64695](https://github.com/ClickHouse/ClickHouse/pull/64695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed excessive part elimination by token-based text indexes (`ngrambf` , `full_text`) when filtering by result of `startsWith`, `endsWith`, `match`, `multiSearchAny`. [#64720](https://github.com/ClickHouse/ClickHouse/pull/64720) ([Eduard Karacharov](https://github.com/korowa)). +* Fixes incorrect behaviour of ANSI CSI escaping in the `UTF8::computeWidth` function. [#64756](https://github.com/ClickHouse/ClickHouse/pull/64756) ([Shaun Struwig](https://github.com/Blargian)). +* Fix a case of incorrect removal of `ORDER BY` / `LIMIT BY` across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)). +* Fix (experimental) unequal join with subqueries for sets which are in the mixed join conditions. [#64775](https://github.com/ClickHouse/ClickHouse/pull/64775) ([lgbo](https://github.com/lgbo-ustc)). +* Fix crash in a local cache over `plain_rewritable` disk. [#64778](https://github.com/ClickHouse/ClickHouse/pull/64778) ([Julia Kartseva](https://github.com/jkartseva)). +* Keeper fix: return correct value for `zk_latest_snapshot_size` in `mntr` command. [#64784](https://github.com/ClickHouse/ClickHouse/pull/64784) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix memory leak in slru cache policy. [#64803](https://github.com/ClickHouse/ClickHouse/pull/64803) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)). +* Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)). +* Fix duplicating `Delete` events in `blob_storage_log` in case of large batch to delete. [#64924](https://github.com/ClickHouse/ClickHouse/pull/64924) ([vdimir](https://github.com/vdimir)). +* Fixed `Session moved to another server` error from [Zoo]Keeper that might happen after server startup when the config has includes from [Zoo]Keeper. [#64986](https://github.com/ClickHouse/ClickHouse/pull/64986) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix `host_id` in DatabaseReplicated when `cluster_secure_connection` parameter is enabled. Previously all the connections within the cluster created by DatabaseReplicated were not secure, even if the parameter was enabled. [#65054](https://github.com/ClickHouse/ClickHouse/pull/65054) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid writing to finalized buffer in File-like storages. [#65063](https://github.com/ClickHouse/ClickHouse/pull/65063) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible infinite query duration in case of cyclic aliases. Fixes [#64849](https://github.com/ClickHouse/ClickHouse/issues/64849). [#65081](https://github.com/ClickHouse/ClickHouse/pull/65081) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)). +* Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)). +* Respond with 5xx instead of 200 OK in case of receive timeout while reading (parts of) the request body from the client socket. [#65118](https://github.com/ClickHouse/ClickHouse/pull/65118) ([Julian Maicher](https://github.com/jmaicher)). +* Fix possible crash for hedged requests. [#65206](https://github.com/ClickHouse/ClickHouse/pull/65206) ([Azat Khuzhin](https://github.com/azat)). +* Fix the bug in Hashed and Hashed_Array dictionary short circuit evaluation, which may read uninitialized number, leading to various errors. [#65256](https://github.com/ClickHouse/ClickHouse/pull/65256) ([jsc0218](https://github.com/jsc0218)). +* This PR ensures that the type of the constant(IN operator's second parameter) is always visible during the IN operator's type conversion process. Otherwise, losing type information may cause some conversions to fail, such as the conversion from DateTime to Date. This fixes ([#64487](https://github.com/ClickHouse/ClickHouse/issues/64487)). [#65315](https://github.com/ClickHouse/ClickHouse/pull/65315) ([pn](https://github.com/chloro-pn)). + +#### Build/Testing/Packaging Improvement +* Add support for LLVM XRay. [#64592](https://github.com/ClickHouse/ClickHouse/pull/64592) [#64837](https://github.com/ClickHouse/ClickHouse/pull/64837) ([Tomer Shafir](https://github.com/tomershafir)). +* Unite s3/hdfs/azure storage implementations into a single class working with IObjectStorage. Same for *Cluster, data lakes and Queue storages. [#59767](https://github.com/ClickHouse/ClickHouse/pull/59767) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Refactor data part writer to remove dependencies on MergeTreeData and DataPart. [#63620](https://github.com/ClickHouse/ClickHouse/pull/63620) ([Alexander Gololobov](https://github.com/davenger)). +* Refactor `KeyCondition` and key analysis to improve PartitionPruner and trivial count optimization. This is separated from [#60463](https://github.com/ClickHouse/ClickHouse/issues/60463) . [#61459](https://github.com/ClickHouse/ClickHouse/pull/61459) ([Amos Bird](https://github.com/amosbird)). +* Introduce assertions to verify all functions are called with columns of the right size. [#63723](https://github.com/ClickHouse/ClickHouse/pull/63723) ([Raúl Marín](https://github.com/Algunenano)). +* Make `network` service be required when using the `rc` init script to start the ClickHouse server daemon. [#60650](https://github.com/ClickHouse/ClickHouse/pull/60650) ([Chun-Sheng, Li](https://github.com/peter279k)). +* Reduce the size of some slow tests. [#64387](https://github.com/ClickHouse/ClickHouse/pull/64387) [#64452](https://github.com/ClickHouse/ClickHouse/pull/64452) ([Raúl Marín](https://github.com/Algunenano)). +* Replay ZooKeeper logs using keeper-bench. [#62481](https://github.com/ClickHouse/ClickHouse/pull/62481) ([Antonio Andelic](https://github.com/antonio2368)). + ### ClickHouse release 24.5, 2024-05-30 #### Backward Incompatible Change diff --git a/README.md b/README.md index 73d989210b5..dc253d4db2d 100644 --- a/README.md +++ b/README.md @@ -34,20 +34,18 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.5 Community Call](https://clickhouse.com/company/events/v24-5-community-release-call) - May 30 +* [v24.6 Community Call](https://clickhouse.com/company/events/v24-6-community-release-call) - Jul 2 ## Upcoming Events Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. -* [ClickHouse Happy Hour @ Tom's Watch Bar - Los Angeles](https://www.meetup.com/clickhouse-los-angeles-user-group/events/300740584/) - May 22 -* [ClickHouse & Confluent Meetup in Dubai](https://www.meetup.com/clickhouse-dubai-meetup-group/events/299629189/) - May 28 -* [ClickHouse Meetup in Stockholm](https://www.meetup.com/clickhouse-stockholm-user-group/events/299752651/) - Jun 3 -* [ClickHouse Meetup @ Cloudflare - San Francisco](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/300523061/) - Jun 4 -* [ClickHouse (クリックハウス) Meetup Tokyo](https://www.meetup.com/clickhouse-tokyo-user-group/events/300798053/) - Jun 5 +* [AWS Summit in DC](https://clickhouse.com/company/events/2024-06-aws-summit-dc) - Jun 26 * [ClickHouse Meetup in Amsterdam](https://www.meetup.com/clickhouse-netherlands-user-group/events/300781068/) - Jun 27 * [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9 +* [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9 * [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9 +* [AWS Summit in New York](https://clickhouse.com/company/events/2024-07-awssummit-nyc) - Jul 10 * [ClickHouse Meetup @ Klaviyo - Boston](https://www.meetup.com/clickhouse-boston-user-group/events/300907870) - Jul 11 ## Recent Recordings diff --git a/base/base/getFQDNOrHostName.cpp b/base/base/getFQDNOrHostName.cpp index 2a4ba8e2e11..6b3da9699b9 100644 --- a/base/base/getFQDNOrHostName.cpp +++ b/base/base/getFQDNOrHostName.cpp @@ -6,6 +6,9 @@ namespace { std::string getFQDNOrHostNameImpl() { +#if defined(OS_DARWIN) + return Poco::Net::DNS::hostName(); +#else try { return Poco::Net::DNS::thisHost().name(); @@ -14,6 +17,7 @@ namespace { return Poco::Net::DNS::hostName(); } +#endif } } diff --git a/base/poco/Crypto/src/OpenSSLInitializer.cpp b/base/poco/Crypto/src/OpenSSLInitializer.cpp index 23447760b47..31798e8dd7e 100644 --- a/base/poco/Crypto/src/OpenSSLInitializer.cpp +++ b/base/poco/Crypto/src/OpenSSLInitializer.cpp @@ -23,9 +23,6 @@ #include #endif -#if __has_feature(address_sanitizer) -#include -#endif using Poco::RandomInputStream; using Poco::Thread; @@ -70,18 +67,12 @@ void OpenSSLInitializer::initialize() SSL_library_init(); SSL_load_error_strings(); OpenSSL_add_all_algorithms(); - + char seed[SEEDSIZE]; RandomInputStream rnd; rnd.read(seed, sizeof(seed)); - { -# if __has_feature(address_sanitizer) - /// Leak sanitizer (part of address sanitizer) thinks that a few bytes of memory in OpenSSL are allocated during but never released. - __lsan::ScopedDisabler lsan_disabler; -#endif - RAND_seed(seed, SEEDSIZE); - } - + RAND_seed(seed, SEEDSIZE); + int nMutexes = CRYPTO_num_locks(); _mutexes = new Poco::FastMutex[nMutexes]; CRYPTO_set_locking_callback(&OpenSSLInitializer::lock); @@ -89,8 +80,8 @@ void OpenSSLInitializer::initialize() // https://sourceforge.net/p/poco/bugs/110/ // // From http://www.openssl.org/docs/crypto/threads.html : -// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(), -// then a default implementation is used - on Windows and BeOS this uses the system's +// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(), +// then a default implementation is used - on Windows and BeOS this uses the system's // default thread identifying APIs" CRYPTO_set_id_callback(&OpenSSLInitializer::id); CRYPTO_set_dynlock_create_callback(&OpenSSLInitializer::dynlockCreate); @@ -109,7 +100,7 @@ void OpenSSLInitializer::uninitialize() CRYPTO_set_locking_callback(0); CRYPTO_set_id_callback(0); delete [] _mutexes; - + CONF_modules_free(); } } diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h index e4037c87927..25dc133fb20 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h @@ -17,6 +17,7 @@ #ifndef NetSSL_SSLManager_INCLUDED #define NetSSL_SSLManager_INCLUDED +#include #include #include "Poco/BasicEvent.h" @@ -219,6 +220,13 @@ namespace Net /// Unless initializeClient() has been called, the first call to this method initializes the default Context /// from the application configuration. + Context::Ptr getCustomServerContext(const std::string & name); + /// Return custom Context used by the server. + + Context::Ptr setCustomServerContext(const std::string & name, Context::Ptr ctx); + /// Set custom Context used by the server. + /// Return pointer on inserted Context or on old Context if exists. + PrivateKeyPassphraseHandlerPtr serverPassphraseHandler(); /// Returns the configured passphrase handler of the server. If none is set, the method will create a default one /// from an application configuration. @@ -258,6 +266,40 @@ namespace Net static const std::string CFG_SERVER_PREFIX; static const std::string CFG_CLIENT_PREFIX; + static const std::string CFG_PRIV_KEY_FILE; + static const std::string CFG_CERTIFICATE_FILE; + static const std::string CFG_CA_LOCATION; + static const std::string CFG_VER_MODE; + static const Context::VerificationMode VAL_VER_MODE; + static const std::string CFG_VER_DEPTH; + static const int VAL_VER_DEPTH; + static const std::string CFG_ENABLE_DEFAULT_CA; + static const bool VAL_ENABLE_DEFAULT_CA; + static const std::string CFG_CIPHER_LIST; + static const std::string CFG_CYPHER_LIST; // for backwards compatibility + static const std::string VAL_CIPHER_LIST; + static const std::string CFG_PREFER_SERVER_CIPHERS; + static const std::string CFG_DELEGATE_HANDLER; + static const std::string VAL_DELEGATE_HANDLER; + static const std::string CFG_CERTIFICATE_HANDLER; + static const std::string VAL_CERTIFICATE_HANDLER; + static const std::string CFG_CACHE_SESSIONS; + static const std::string CFG_SESSION_ID_CONTEXT; + static const std::string CFG_SESSION_CACHE_SIZE; + static const std::string CFG_SESSION_TIMEOUT; + static const std::string CFG_EXTENDED_VERIFICATION; + static const std::string CFG_REQUIRE_TLSV1; + static const std::string CFG_REQUIRE_TLSV1_1; + static const std::string CFG_REQUIRE_TLSV1_2; + static const std::string CFG_DISABLE_PROTOCOLS; + static const std::string CFG_DH_PARAMS_FILE; + static const std::string CFG_ECDH_CURVE; + +#ifdef OPENSSL_FIPS + static const std::string CFG_FIPS_MODE; + static const bool VAL_FIPS_MODE; +#endif + protected: static int verifyClientCallback(int ok, X509_STORE_CTX * pStore); /// The return value of this method defines how errors in @@ -314,39 +356,7 @@ namespace Net InvalidCertificateHandlerPtr _ptrClientCertificateHandler; Poco::FastMutex _mutex; - static const std::string CFG_PRIV_KEY_FILE; - static const std::string CFG_CERTIFICATE_FILE; - static const std::string CFG_CA_LOCATION; - static const std::string CFG_VER_MODE; - static const Context::VerificationMode VAL_VER_MODE; - static const std::string CFG_VER_DEPTH; - static const int VAL_VER_DEPTH; - static const std::string CFG_ENABLE_DEFAULT_CA; - static const bool VAL_ENABLE_DEFAULT_CA; - static const std::string CFG_CIPHER_LIST; - static const std::string CFG_CYPHER_LIST; // for backwards compatibility - static const std::string VAL_CIPHER_LIST; - static const std::string CFG_PREFER_SERVER_CIPHERS; - static const std::string CFG_DELEGATE_HANDLER; - static const std::string VAL_DELEGATE_HANDLER; - static const std::string CFG_CERTIFICATE_HANDLER; - static const std::string VAL_CERTIFICATE_HANDLER; - static const std::string CFG_CACHE_SESSIONS; - static const std::string CFG_SESSION_ID_CONTEXT; - static const std::string CFG_SESSION_CACHE_SIZE; - static const std::string CFG_SESSION_TIMEOUT; - static const std::string CFG_EXTENDED_VERIFICATION; - static const std::string CFG_REQUIRE_TLSV1; - static const std::string CFG_REQUIRE_TLSV1_1; - static const std::string CFG_REQUIRE_TLSV1_2; - static const std::string CFG_DISABLE_PROTOCOLS; - static const std::string CFG_DH_PARAMS_FILE; - static const std::string CFG_ECDH_CURVE; - -#ifdef OPENSSL_FIPS - static const std::string CFG_FIPS_MODE; - static const bool VAL_FIPS_MODE; -#endif + std::unordered_map _mapPtrServerContexts; friend class Poco::SingletonHolder; friend class Context; diff --git a/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp b/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp index 7f6cc9abcb2..ae04a994786 100644 --- a/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp +++ b/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp @@ -330,27 +330,26 @@ void SSLManager::initDefaultContext(bool server) else _ptrDefaultClientContext->disableProtocols(disabledProtocols); - /// Temporarily disabled during the transition from boringssl to OpenSSL due to tsan issues. - /// bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false); - /// if (server) - /// { - /// std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", "")); - /// _ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext); - /// if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE)) - /// { - /// int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE); - /// _ptrDefaultServerContext->setSessionCacheSize(cacheSize); - /// } - /// if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT)) - /// { - /// int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT); - /// _ptrDefaultServerContext->setSessionTimeout(timeout); - /// } - /// } - /// else - /// { - /// _ptrDefaultClientContext->enableSessionCache(cacheSessions); - /// } + bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false); + if (server) + { + std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", "")); + _ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext); + if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE)) + { + int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE); + _ptrDefaultServerContext->setSessionCacheSize(cacheSize); + } + if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT)) + { + int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT); + _ptrDefaultServerContext->setSessionTimeout(timeout); + } + } + else + { + _ptrDefaultClientContext->enableSessionCache(cacheSessions); + } bool extendedVerification = config.getBool(prefix + CFG_EXTENDED_VERIFICATION, false); if (server) _ptrDefaultServerContext->enableExtendedCertificateVerification(extendedVerification); @@ -429,6 +428,23 @@ void SSLManager::initCertificateHandler(bool server) } +Context::Ptr SSLManager::getCustomServerContext(const std::string & name) +{ + Poco::FastMutex::ScopedLock lock(_mutex); + auto it = _mapPtrServerContexts.find(name); + if (it != _mapPtrServerContexts.end()) + return it->second; + return nullptr; +} + +Context::Ptr SSLManager::setCustomServerContext(const std::string & name, Context::Ptr ctx) +{ + Poco::FastMutex::ScopedLock lock(_mutex); + ctx = _mapPtrServerContexts.insert({name, ctx}).first->second; + return ctx; +} + + Poco::Util::AbstractConfiguration& SSLManager::appConfig() { try diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index dfbbb66a1e9..bb776fa9506 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -1,12 +1,12 @@ # This variables autochanged by tests/ci/version_helper.py: -# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, +# NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54487) +SET(VERSION_REVISION 54488) SET(VERSION_MAJOR 24) -SET(VERSION_MINOR 6) +SET(VERSION_MINOR 7) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 70a1d3a63d47f0be077d67b8deb907230fc7cfb0) -SET(VERSION_DESCRIBE v24.6.1.1-testing) -SET(VERSION_STRING 24.6.1.1) +SET(VERSION_GITHASH aa023477a9265e403982fca5ee29a714db5133d9) +SET(VERSION_DESCRIBE v24.7.1.1-testing) +SET(VERSION_STRING 24.7.1.1) # end of autochange diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index b633f0fda50..023fdcf103a 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -34,7 +34,7 @@ if (OS_LINUX) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() diff --git a/contrib/openssl b/contrib/openssl index 67c0b63e578..5d81fa7068f 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit 67c0b63e578e4c751ac9edf490f5a96124fff8dc +Subproject commit 5d81fa7068fc8c07f4d0997d5b703f3c541a637c diff --git a/contrib/re2 b/contrib/re2 index a807e8a3aac..85dd7ad833a 160000 --- a/contrib/re2 +++ b/contrib/re2 @@ -1 +1 @@ -Subproject commit a807e8a3aac2cc33c77b7071efea54fcabe38e0c +Subproject commit 85dd7ad833a73095ecf3e3baea608ba051bbe2c7 diff --git a/contrib/re2-cmake/CMakeLists.txt b/contrib/re2-cmake/CMakeLists.txt index f773bc65a69..99d61839b30 100644 --- a/contrib/re2-cmake/CMakeLists.txt +++ b/contrib/re2-cmake/CMakeLists.txt @@ -28,16 +28,20 @@ set(RE2_SOURCES add_library(_re2 ${RE2_SOURCES}) target_include_directories(_re2 PUBLIC "${SRC_DIR}") target_link_libraries(_re2 PRIVATE + absl::absl_check + absl::absl_log absl::base absl::core_headers absl::fixed_array + absl::flags absl::flat_hash_map absl::flat_hash_set + absl::hash absl::inlined_vector - absl::strings - absl::str_format - absl::synchronization absl::optional - absl::span) + absl::span + absl::str_format + absl::strings + absl::synchronization) add_library(ch_contrib::re2 ALIAS _re2) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index f94621ba092..3ce489b9e0e 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -254,7 +254,7 @@ function run_tests() set +e clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ - --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ + --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt set -e @@ -285,7 +285,7 @@ stop_logs_replication # Try to get logs while server is running failed_to_save_logs=0 -for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log +for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log do err=$(clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.gz' format TSVWithNamesAndTypes") echo "$err" @@ -339,7 +339,7 @@ if [ $failed_to_save_logs -ne 0 ]; then # directly # - even though ci auto-compress some files (but not *.tsv) it does this only # for files >64MB, we want this files to be compressed explicitly - for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log + for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log do clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then @@ -379,6 +379,10 @@ fi tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: +rm -rf /var/lib/clickhouse/data/system/*/ +tar -chf /test_output/store.tar /var/lib/clickhouse/store ||: +tar -chf /test_output/metadata.tar /var/lib/clickhouse/metadata/*.sql ||: + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: rg -Fa "" /var/log/clickhouse-server/clickhouse-server2.log ||: diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 6ad03852b66..7cd712b73f6 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -37,6 +37,7 @@ RUN pip3 install \ tqdm==4.66.4 \ types-requests \ unidiff \ + jwt \ && rm -rf /root/.cache/pip RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 diff --git a/docs/changelogs/v24.4.3.25-stable.md b/docs/changelogs/v24.4.3.25-stable.md new file mode 100644 index 00000000000..9582753c731 --- /dev/null +++ b/docs/changelogs/v24.4.3.25-stable.md @@ -0,0 +1,30 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.4.3.25-stable (a915dd4eda4) FIXME as compared to v24.4.2.141-stable (9e23d27bd11) + +#### Build/Testing/Packaging Improvement +* Backported in [#65130](https://github.com/ClickHouse/ClickHouse/issues/65130): Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Backported in [#64982](https://github.com/ClickHouse/ClickHouse/issues/64982): Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64974](https://github.com/ClickHouse/ClickHouse/issues/64974): Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)). +* Backported in [#65072](https://github.com/ClickHouse/ClickHouse/issues/65072): Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#65177](https://github.com/ClickHouse/ClickHouse/issues/65177): Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65263](https://github.com/ClickHouse/ClickHouse/issues/65263): Fix the bug in Hashed and Hashed_Array dictionary short circuit evaluation, which may read uninitialized number, leading to various errors. [#65256](https://github.com/ClickHouse/ClickHouse/pull/65256) ([jsc0218](https://github.com/jsc0218)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) + +* Backported in [#65285](https://github.com/ClickHouse/ClickHouse/issues/65285): Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65114](https://github.com/ClickHouse/ClickHouse/issues/65114): Adjust the `version_helper` and script to a new release scheme. [#64759](https://github.com/ClickHouse/ClickHouse/pull/64759) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#65225](https://github.com/ClickHouse/ClickHouse/issues/65225): Capture weak_ptr of ContextAccess for safety. [#65051](https://github.com/ClickHouse/ClickHouse/pull/65051) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#65217](https://github.com/ClickHouse/ClickHouse/issues/65217): Fix false positives leaky memory warnings in OpenSSL. [#65125](https://github.com/ClickHouse/ClickHouse/pull/65125) ([Robert Schulze](https://github.com/rschu1ze)). + diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index ec5760541e8..0a1fe58b16f 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -267,7 +267,7 @@ A pull request can be created even if the work is not completed yet. In this cas Testing will commence as soon as ClickHouse employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour. -The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “ClickHouse build check” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear). +The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “Builds” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear). Most probably some of the builds will fail at first times. This is due to the fact that we check builds both with gcc as well as with clang, with almost all of existing warnings (always with the `-Werror` flag) enabled for clang. On that same page, you can find all of the build logs so that you do not have to build ClickHouse in all of the possible ways. diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 8dff6f0ed1d..269995a1a96 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -28,7 +28,7 @@ run, for example, the test `01428_hash_set_nan_key`, change to the repository folder and run the following command: ``` -PATH=$PATH: tests/clickhouse-test 01428_hash_set_nan_key +PATH=:$PATH tests/clickhouse-test 01428_hash_set_nan_key ``` Test results (`stderr` and `stdout`) are written to files `01428_hash_set_nan_key.[stderr|stdout]` which diff --git a/docs/en/engines/table-engines/integrations/azureBlobStorage.md b/docs/en/engines/table-engines/integrations/azureBlobStorage.md index dfc27d6b8cf..bdf96832e9d 100644 --- a/docs/en/engines/table-engines/integrations/azureBlobStorage.md +++ b/docs/en/engines/table-engines/integrations/azureBlobStorage.md @@ -56,6 +56,15 @@ SELECT * FROM test_table; - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. +## Authentication + +Currently there are 3 ways to authenticate: +- `Managed Identity` - Can be used by providing an `endpoint`, `connection_string` or `storage_account_url`. +- `SAS Token` - Can be used by providing an `endpoint`, `connection_string` or `storage_account_url`. It is identified by presence of '?' in the url. +- `Workload Identity` - Can be used by providing an `endpoint` or `storage_account_url`. If `use_workload_identity` parameter is set in config, ([workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications)) is used for authentication. + + + ## See also [Azure Blob Storage Table Function](/docs/en/sql-reference/table-functions/azureBlobStorage) diff --git a/docs/en/engines/table-engines/integrations/iceberg.md b/docs/en/engines/table-engines/integrations/iceberg.md index 9d6395f73ac..21fdbc0b1a5 100644 --- a/docs/en/engines/table-engines/integrations/iceberg.md +++ b/docs/en/engines/table-engines/integrations/iceberg.md @@ -37,7 +37,7 @@ Using named collections: http://test.s3.amazonaws.com/clickhouse-bucket/ - test + test test diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 0958680dc56..11181703645 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -13,7 +13,7 @@ This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ec CREATE TABLE s3_queue_engine_table (name String, value UInt32) ENGINE = S3Queue(path, [NOSIGN, | aws_access_key_id, aws_secret_access_key,] format, [compression]) [SETTINGS] - [mode = 'unordered',] + [mode = '',] [after_processing = 'keep',] [keeper_path = '',] [s3queue_loading_retries = 0,] @@ -28,6 +28,8 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32) [s3queue_cleanup_interval_max_ms = 30000,] ``` +Starting with `24.7` settings without `s3queue_` prefix are also supported. + **Engine parameters** - `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). diff --git a/docs/en/getting-started/example-datasets/images/stackoverflow.png b/docs/en/getting-started/example-datasets/images/stackoverflow.png new file mode 100644 index 00000000000..f31acdc8cc3 Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/stackoverflow.png differ diff --git a/docs/en/getting-started/example-datasets/stackoverflow.md b/docs/en/getting-started/example-datasets/stackoverflow.md new file mode 100644 index 00000000000..e982a3c3dfc --- /dev/null +++ b/docs/en/getting-started/example-datasets/stackoverflow.md @@ -0,0 +1,394 @@ +--- +slug: /en/getting-started/example-datasets/stackoverflow +sidebar_label: Stack Overflow +sidebar_position: 1 +description: Analyzing Stack Overflow data with ClickHouse +--- + +# Analyzing Stack Overflow data with ClickHouse + +This dataset contains every `Post`, `User`, `Vote`, `Comment`, `Badge, `PostHistory`, and `PostLink` that has occurred on Stack Overflow. + +Users can either download pre-prepared Parquet versions of the data, containing every post up to April 2024, or download the latest data in XML format and load this. Stack Overflow provide updates to this data periodically - historically every 3 months. + +The following diagram shows the schema for the available tables assuming Parquet format. + +![Stack Overflow schema](./images/stackoverflow.png) + +A description of the schema of this data can be found [here](https://meta.stackexchange.com/questions/2677/database-schema-documentation-for-the-public-data-dump-and-sede). + +## Pre-prepared data + +We provide a copy of this data in Parquet format, up to date as of April 2024. While small for ClickHouse with respect to the number of rows (60 million posts), this dataset contains significant volumes of text and large String columns. + +```sql +CREATE DATABASE stackoverflow +``` + +The following timings are for a 96 GiB, 24 vCPU ClickHouse Cloud cluster located in `eu-west-2`. The dataset is located in `eu-west-3`. + +### Posts + +```sql +CREATE TABLE stackoverflow.posts +( + `Id` Int32 CODEC(Delta(4), ZSTD(1)), + `PostTypeId` Enum8('Question' = 1, 'Answer' = 2, 'Wiki' = 3, 'TagWikiExcerpt' = 4, 'TagWiki' = 5, 'ModeratorNomination' = 6, 'WikiPlaceholder' = 7, 'PrivilegeWiki' = 8), + `AcceptedAnswerId` UInt32, + `CreationDate` DateTime64(3, 'UTC'), + `Score` Int32, + `ViewCount` UInt32 CODEC(Delta(4), ZSTD(1)), + `Body` String, + `OwnerUserId` Int32, + `OwnerDisplayName` String, + `LastEditorUserId` Int32, + `LastEditorDisplayName` String, + `LastEditDate` DateTime64(3, 'UTC') CODEC(Delta(8), ZSTD(1)), + `LastActivityDate` DateTime64(3, 'UTC'), + `Title` String, + `Tags` String, + `AnswerCount` UInt16 CODEC(Delta(2), ZSTD(1)), + `CommentCount` UInt8, + `FavoriteCount` UInt8, + `ContentLicense` LowCardinality(String), + `ParentId` String, + `CommunityOwnedDate` DateTime64(3, 'UTC'), + `ClosedDate` DateTime64(3, 'UTC') +) +ENGINE = MergeTree +PARTITION BY toYear(CreationDate) +ORDER BY (PostTypeId, toDate(CreationDate), CreationDate) + +INSERT INTO stackoverflow.posts SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/*.parquet') + +0 rows in set. Elapsed: 265.466 sec. Processed 59.82 million rows, 38.07 GB (225.34 thousand rows/s., 143.42 MB/s.) +``` + +Posts are also available by year e.g. [https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/2020.parquet](https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/2020.parquet) + + +### Votes + +```sql +CREATE TABLE stackoverflow.votes +( + `Id` UInt32, + `PostId` Int32, + `VoteTypeId` UInt8, + `CreationDate` DateTime64(3, 'UTC'), + `UserId` Int32, + `BountyAmount` UInt8 +) +ENGINE = MergeTree +ORDER BY (VoteTypeId, CreationDate, PostId, UserId) + +INSERT INTO stackoverflow.votes SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/votes/*.parquet') + +0 rows in set. Elapsed: 21.605 sec. Processed 238.98 million rows, 2.13 GB (11.06 million rows/s., 98.46 MB/s.) +``` + +Votes are also available by year e.g. [https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/2020.parquet](https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/votes/2020.parquet) + + +### Comments + +```sql +CREATE TABLE stackoverflow.comments +( + `Id` UInt32, + `PostId` UInt32, + `Score` UInt16, + `Text` String, + `CreationDate` DateTime64(3, 'UTC'), + `UserId` Int32, + `UserDisplayName` LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY CreationDate + +INSERT INTO stackoverflow.comments SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/comments/*.parquet') + +0 rows in set. Elapsed: 56.593 sec. Processed 90.38 million rows, 11.14 GB (1.60 million rows/s., 196.78 MB/s.) +``` + +Comments are also available by year e.g. [https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/2020.parquet](https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/comments/2020.parquet) + +### Users + +```sql +CREATE TABLE stackoverflow.users +( + `Id` Int32, + `Reputation` LowCardinality(String), + `CreationDate` DateTime64(3, 'UTC') CODEC(Delta(8), ZSTD(1)), + `DisplayName` String, + `LastAccessDate` DateTime64(3, 'UTC'), + `AboutMe` String, + `Views` UInt32, + `UpVotes` UInt32, + `DownVotes` UInt32, + `WebsiteUrl` String, + `Location` LowCardinality(String), + `AccountId` Int32 +) +ENGINE = MergeTree +ORDER BY (Id, CreationDate) + +INSERT INTO stackoverflow.users SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/users.parquet') + +0 rows in set. Elapsed: 10.988 sec. Processed 22.48 million rows, 1.36 GB (2.05 million rows/s., 124.10 MB/s.) +``` + +### Badges + +```sql +CREATE TABLE stackoverflow.badges +( + `Id` UInt32, + `UserId` Int32, + `Name` LowCardinality(String), + `Date` DateTime64(3, 'UTC'), + `Class` Enum8('Gold' = 1, 'Silver' = 2, 'Bronze' = 3), + `TagBased` Bool +) +ENGINE = MergeTree +ORDER BY UserId + +INSERT INTO stackoverflow.badges SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/badges.parquet') + +0 rows in set. Elapsed: 6.635 sec. Processed 51.29 million rows, 797.05 MB (7.73 million rows/s., 120.13 MB/s.) +``` + +### `PostLinks` + +```sql +CREATE TABLE stackoverflow.postlinks +( + `Id` UInt64, + `CreationDate` DateTime64(3, 'UTC'), + `PostId` Int32, + `RelatedPostId` Int32, + `LinkTypeId` Enum8('Linked' = 1, 'Duplicate' = 3) +) +ENGINE = MergeTree +ORDER BY (PostId, RelatedPostId) + +INSERT INTO stackoverflow.postlinks SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/postlinks.parquet') + +0 rows in set. Elapsed: 1.534 sec. Processed 6.55 million rows, 129.70 MB (4.27 million rows/s., 84.57 MB/s.) +``` + +### `PostHistory` + +```sql +CREATE TABLE stackoverflow.posthistory +( + `Id` UInt64, + `PostHistoryTypeId` UInt8, + `PostId` Int32, + `RevisionGUID` String, + `CreationDate` DateTime64(3, 'UTC'), + `UserId` Int32, + `Text` String, + `ContentLicense` LowCardinality(String), + `Comment` String, + `UserDisplayName` String +) +ENGINE = MergeTree +ORDER BY (CreationDate, PostId) + +INSERT INTO stackoverflow.posthistory SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posthistory/*.parquet') + +0 rows in set. Elapsed: 422.795 sec. Processed 160.79 million rows, 67.08 GB (380.30 thousand rows/s., 158.67 MB/s.) +``` + +## Original dataset + +The original dataset is available in compressed (7zip) XML format at [https://archive.org/download/stackexchange](https://archive.org/download/stackexchange) - files with prefix `stackoverflow.com*`. + +### Download + +```bash +wget https://archive.org/download/stackexchange/stackoverflow.com-Badges.7z +wget https://archive.org/download/stackexchange/stackoverflow.com-Comments.7z +wget https://archive.org/download/stackexchange/stackoverflow.com-PostHistory.7z +wget https://archive.org/download/stackexchange/stackoverflow.com-PostLinks.7z +wget https://archive.org/download/stackexchange/stackoverflow.com-Posts.7z +wget https://archive.org/download/stackexchange/stackoverflow.com-Users.7z +wget https://archive.org/download/stackexchange/stackoverflow.com-Votes.7z +``` + +These files are up to 35GB and can take around 30 mins to download depending on internet connection - the download server throttles at around 20MB/sec. + +### Convert to JSON + +At the time of writing, ClickHouse does not have native support for XML as an input format. To load the data into ClickHouse we first convert to NDJSON. + +To convert XML to JSON we recommend the [`xq`](https://github.com/kislyuk/yq) linux tool, a simple `jq` wrapper for XML documents. + +Install xq and jq: + +```bash +sudo apt install jq +pip install yq +``` + +The following steps apply to any of the above files. We use the `stackoverflow.com-Posts.7z` file as an example. Modify as required. + +Extract the file using [p7zip](https://p7zip.sourceforge.net/). This will produce a single xml file - in this case `Posts.xml`. + +> Files are compressed approximately 4.5x. At 22GB compressed, the posts file requires around 97G uncompressed. + +```bash +p7zip -d stackoverflow.com-Posts.7z +``` + +The following splits the xml file into files, each containing 10000 rows. + +```bash +mkdir posts +cd posts +# the following splits the input xml file into sub files of 10000 rows +tail +3 ../Posts.xml | head -n -1 | split -l 10000 --filter='{ printf "\n"; cat - ; printf "\n"; } > $FILE' - +``` + +After running the above users will have a set of files, each with 10000 lines. This ensures the memory overhead of the next command is not excessive (xml to JSON conversion is done in memory). + +```bash +find . -maxdepth 1 -type f -exec xq -c '.rows.row[]' {} \; | sed -e 's:"@:":g' > posts_v2.json +``` + +The above command will produce a single `posts.json` file. + +Load into ClickHouse with the following command. Note the schema is specified for the `posts.json` file. This will need to be adjusted per data type to align with the target table. + +```bash +clickhouse local --query "SELECT * FROM file('posts.json', JSONEachRow, 'Id Int32, PostTypeId UInt8, AcceptedAnswerId UInt32, CreationDate DateTime64(3, \'UTC\'), Score Int32, ViewCount UInt32, Body String, OwnerUserId Int32, OwnerDisplayName String, LastEditorUserId Int32, LastEditorDisplayName String, LastEditDate DateTime64(3, \'UTC\'), LastActivityDate DateTime64(3, \'UTC\'), Title String, Tags String, AnswerCount UInt16, CommentCount UInt8, FavoriteCount UInt8, ContentLicense String, ParentId String, CommunityOwnedDate DateTime64(3, \'UTC\'), ClosedDate DateTime64(3, \'UTC\')') FORMAT Native" | clickhouse client --host --secure --password --query "INSERT INTO stackoverflow.posts_v2 FORMAT Native" +``` + +## Example queries + +A few simple questions to you get started. + +### Most popular tags on Stack Overflow + +```sql + +SELECT + arrayJoin(arrayFilter(t -> (t != ''), splitByChar('|', Tags))) AS Tags, + count() AS c +FROM stackoverflow.posts +GROUP BY Tags +ORDER BY c DESC +LIMIT 10 + +┌─Tags───────┬───────c─┐ +│ javascript │ 2527130 │ +│ python │ 2189638 │ +│ java │ 1916156 │ +│ c# │ 1614236 │ +│ php │ 1463901 │ +│ android │ 1416442 │ +│ html │ 1186567 │ +│ jquery │ 1034621 │ +│ c++ │ 806202 │ +│ css │ 803755 │ +└────────────┴─────────┘ + +10 rows in set. Elapsed: 1.013 sec. Processed 59.82 million rows, 1.21 GB (59.07 million rows/s., 1.19 GB/s.) +Peak memory usage: 224.03 MiB. +``` + +### User with the most answers (active accounts) + +Account requires a `UserId`. + +```sql +SELECT + any(OwnerUserId) UserId, + OwnerDisplayName, + count() AS c +FROM stackoverflow.posts WHERE OwnerDisplayName != '' AND PostTypeId='Answer' AND OwnerUserId != 0 +GROUP BY OwnerDisplayName +ORDER BY c DESC +LIMIT 5 + +┌─UserId─┬─OwnerDisplayName─┬────c─┐ +│ 22656 │ Jon Skeet │ 2727 │ +│ 23354 │ Marc Gravell │ 2150 │ +│ 12950 │ tvanfosson │ 1530 │ +│ 3043 │ Joel Coehoorn │ 1438 │ +│ 10661 │ S.Lott │ 1087 │ +└────────┴──────────────────┴──────┘ + +5 rows in set. Elapsed: 0.154 sec. Processed 35.83 million rows, 193.39 MB (232.33 million rows/s., 1.25 GB/s.) +Peak memory usage: 206.45 MiB. +``` + +### ClickHouse related posts with the most views + +```sql +SELECT + Id, + Title, + ViewCount, + AnswerCount +FROM stackoverflow.posts +WHERE Title ILIKE '%ClickHouse%' +ORDER BY ViewCount DESC +LIMIT 10 + +┌───────Id─┬─Title────────────────────────────────────────────────────────────────────────────┬─ViewCount─┬─AnswerCount─┐ +│ 52355143 │ Is it possible to delete old records from clickhouse table? │ 41462 │ 3 │ +│ 37954203 │ Clickhouse Data Import │ 38735 │ 3 │ +│ 37901642 │ Updating data in Clickhouse │ 36236 │ 6 │ +│ 58422110 │ Pandas: How to insert dataframe into Clickhouse │ 29731 │ 4 │ +│ 63621318 │ DBeaver - Clickhouse - SQL Error [159] .. Read timed out │ 27350 │ 1 │ +│ 47591813 │ How to filter clickhouse table by array column contents? │ 27078 │ 2 │ +│ 58728436 │ How to search the string in query with case insensitive on Clickhouse database? │ 26567 │ 3 │ +│ 65316905 │ Clickhouse: DB::Exception: Memory limit (for query) exceeded │ 24899 │ 2 │ +│ 49944865 │ How to add a column in clickhouse │ 24424 │ 1 │ +│ 59712399 │ How to cast date Strings to DateTime format with extended parsing in ClickHouse? │ 22620 │ 1 │ +└──────────┴──────────────────────────────────────────────────────────────────────────────────┴───────────┴─────────────┘ + +10 rows in set. Elapsed: 0.472 sec. Processed 59.82 million rows, 1.91 GB (126.63 million rows/s., 4.03 GB/s.) +Peak memory usage: 240.01 MiB. +``` + +### Most controversial posts + +```sql +SELECT + Id, + Title, + UpVotes, + DownVotes, + abs(UpVotes - DownVotes) AS Controversial_ratio +FROM stackoverflow.posts +INNER JOIN +( + SELECT + PostId, + countIf(VoteTypeId = 2) AS UpVotes, + countIf(VoteTypeId = 3) AS DownVotes + FROM stackoverflow.votes + GROUP BY PostId + HAVING (UpVotes > 10) AND (DownVotes > 10) +) AS votes ON posts.Id = votes.PostId +WHERE Title != '' +ORDER BY Controversial_ratio ASC +LIMIT 3 + +┌───────Id─┬─Title─────────────────────────────────────────────┬─UpVotes─┬─DownVotes─┬─Controversial_ratio─┐ +│ 583177 │ VB.NET Infinite For Loop │ 12 │ 12 │ 0 │ +│ 9756797 │ Read console input as enumerable - one statement? │ 16 │ 16 │ 0 │ +│ 13329132 │ What's the point of ARGV in Ruby? │ 22 │ 22 │ 0 │ +└──────────┴───────────────────────────────────────────────────┴─────────┴───────────┴─────────────────────┘ + +3 rows in set. Elapsed: 4.779 sec. Processed 298.80 million rows, 3.16 GB (62.52 million rows/s., 661.05 MB/s.) +Peak memory usage: 6.05 GiB. +``` + +## Attribution + +We thank Stack Overflow for providing this data under the `cc-by-sa 4.0` license, acknowledging their efforts and the original source of the data at [https://archive.org/details/stackexchange](https://archive.org/details/stackexchange). diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 67752f223ce..98e73dec451 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -314,7 +314,7 @@ For example, to download a aarch64 binary for ClickHouse v23.4, follow these ste - Find the GitHub pull request for release v23.4: [Release pull request for branch 23.4](https://github.com/ClickHouse/ClickHouse/pull/49238) - Click "Commits", then click a commit similar to "Update autogenerated version to 23.4.2.1 and contributors" for the particular version you like to install. - Click the green check / yellow dot / red cross to open the list of CI checks. -- Click "Details" next to "ClickHouse Build Check" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html) +- Click "Details" next to "Builds" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html) - Find the rows with compiler = "clang-*-aarch64" - there are multiple rows. - Download the artifacts for these builds. diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 1eb426af617..e18ff6f1a3f 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -193,6 +193,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--hardware-utilization` — Print hardware utilization information in progress bar. - `--print-profile-events` – Print `ProfileEvents` packets. - `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet). +- `--jwt` – If specified, enables authorization via JSON Web Token. Server JWT authorization is available only in ClickHouse Cloud. Instead of `--host`, `--port`, `--user` and `--password` options, ClickHouse client also supports connection strings (see next section). diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index ffdd7e2ca25..a81a17e65d6 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -2169,6 +2169,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`. - [input_format_parquet_max_block_size](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Max block row size for parquet reader. Default value - `65409`. - [input_format_parquet_prefer_block_bytes](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_prefer_block_bytes) - Average block bytes output by parquet reader. Default value - `16744704`. +- [output_format_parquet_write_page_index](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Add a possibility to write page index into parquet files. Need to disable `output_format_parquet_use_custom_encoder` at present. Default value - `true`. ## ParquetMetadata {data-format-parquet-metadata} diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index ce5ab24ecb0..42820505406 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -31,6 +31,56 @@ Alternatively, in order to enable the MySQL interface for an existing service: 3. After entering the password, you will get prompted the MySQL connection string for this service ![Connection screen - MySQL Enabled](./images/mysql5.png) +## Creating multiple MySQL users in ClickHouse Cloud + +By default, there is a built-in `mysql4` user, which uses the same password as the `default` one. The `` part is the first segment of your ClickHouse Cloud hostname. This format is necessary to work with the tools that implement secure connection, but don't provide [SNI information in their TLS handshake](https://www.cloudflare.com/learning/ssl/what-is-sni), which makes it impossible to do the internal routing without an extra hint in the username (MySQL console client is one of such tools). + +Because of this, we _highly recommend_ following the `mysql4_` format when creating a new user intended to be used with the MySQL interface, where `` is a hint to identify your Cloud service, and `` is an arbitrary suffix of your choice. + +:::tip +For ClickHouse Cloud hostname like `foobar.us-east1.aws.clickhouse.cloud`, the `` part equals to `foobar`, and a custom MySQL username could look like `mysql4foobar_team1`. +::: + +You can create extra users to use with the MySQL interface if, for example, you need to apply extra settings. + +1. Optional - create a [settings profile](https://clickhouse.com/docs/en/sql-reference/statements/create/settings-profile) to apply for your custom user. For example, `my_custom_profile` with an extra setting which will be applied by default when we connect with the user we create later: + + ```sql + CREATE SETTINGS PROFILE my_custom_profile SETTINGS prefer_column_name_to_alias=1; + ``` + + `prefer_column_name_to_alias` is used just as an example, you can use other settings there. +2. [Create a user](https://clickhouse.com/docs/en/sql-reference/statements/create/user) using the following format: `mysql4_` ([see above](#creating-multiple-mysql-users-in-clickhouse-cloud)). The password must be in double SHA1 format. For example: + + ```sql + CREATE USER mysql4foobar_team1 IDENTIFIED WITH double_sha1_password BY 'YourPassword42$'; + ``` + + or if you want to use a custom profile for this user: + + ```sql + CREATE USER mysql4foobar_team1 IDENTIFIED WITH double_sha1_password BY 'YourPassword42$' SETTINGS PROFILE 'my_custom_profile'; + ``` + + where `my_custom_profile` is the name of the profile you created earlier. +3. [Grant](https://clickhouse.com/docs/en/sql-reference/statements/grant) the new user the necessary permissions to interact with the desired tables or databases. For example, if you want to grant access to `system.query_log` only: + + ```sql + GRANT SELECT ON system.query_log TO mysql4foobar_team1; + ``` + +4. Use the created user to connect to your ClickHouse Cloud service with the MySQL interface. + +### Troubleshooting multiple MySQL users in ClickHouse Cloud + +If you created a new MySQL user, and you see the following error while connecting via MySQL CLI client: + +``` +ERROR 2013 (HY000): Lost connection to MySQL server at 'reading authorization packet', system error: 54 +``` + +In this case, ensure that the username follows the `mysql4_` format, as described ([above](#creating-multiple-mysql-users-in-clickhouse-cloud)). + ## Enabling the MySQL Interface On Self-managed ClickHouse Add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d/` [folder](../operations/configuration-files): diff --git a/docs/en/operations/allocation-profiling.md b/docs/en/operations/allocation-profiling.md index 64b4106a7e1..574e1ae2ff3 100644 --- a/docs/en/operations/allocation-profiling.md +++ b/docs/en/operations/allocation-profiling.md @@ -59,10 +59,10 @@ For that, we need to use `jemalloc`'s tool called [jeprof](https://github.com/je If that’s the case, we recommend installing an [alternative implementation](https://github.com/gimli-rs/addr2line) of the tool. ``` -git clone https://github.com/gimli-rs/addr2line +git clone https://github.com/gimli-rs/addr2line.git --depth=1 --branch=0.23.0 cd addr2line -cargo b --examples -r -cp ./target/release/examples/addr2line path/to/current/addr2line +cargo build --features bin --release +cp ./target/release/addr2line path/to/current/addr2line ``` ::: diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index 91438cfb675..59ee05d1f9e 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -5,6 +5,10 @@ sidebar_label: "Named collections" title: "Named collections" --- +import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge'; + + + Named collections provide a way to store collections of key-value pairs to be used to configure integrations with external sources. You can use named collections with dictionaries, tables, table functions, and object storage. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index f50dae0f1a2..8278f8c8699 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -498,6 +498,8 @@ Default: 0.9 Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see settings `cgroup_memory_watcher_hard_limit_ratio` and `cgroup_memory_watcher_soft_limit_ratio`). +To disable the cgroup observer, set this value to `0`. + Type: UInt64 Default: 15 @@ -591,6 +593,22 @@ Default value: 100000 400 ``` +## max\_table\_num\_to\_throw {#max-table-num-to-throw} +If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.Default value: 0 + +**Example** +```xml +400 +``` + +## max\_database\_num\_to\_throw {#max-table-num-to-throw} +If number of _database is greater than this value, server will throw an exception. 0 means no limitation. +Default value: 0 + +**Example** +```xml +400 +``` ## max_temporary_data_on_disk_size @@ -938,6 +956,38 @@ Or it can be set in hex: Everything mentioned above can be applied for `aes_256_gcm_siv` (but the key must be 32 bytes long). +## error_log {#error_log} + +It is disabled by default. + +**Enabling** + +To manually turn on error history collection [`system.error_log`](../../operations/system-tables/error_log.md), create `/etc/clickhouse-server/config.d/error_log.xml` with the following content: + +``` xml + + + system + error_log
+ 7500 + 1000 + 1048576 + 8192 + 524288 + false +
+
+``` + +**Disabling** + +To disable `error_log` setting, you should create the following file `/etc/clickhouse-server/config.d/disable_error_log.xml` with the following content: + +``` xml + + + +``` ## custom_settings_prefixes {#custom_settings_prefixes} @@ -1415,6 +1465,9 @@ Keys: - `size` – Size of the file. Applies to `log` and `errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. - `count` – The number of archived log files that ClickHouse stores. - `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`. +- `console_log_level` – Logging level for console. Default to `level`. +- `use_syslog` - Log to syslog as well. +- `syslog_level` - Logging level for logging to syslog. - `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`. - `formatting` – Specify log format to be printed in console log (currently only `json` supported). @@ -1901,7 +1954,7 @@ For more information, see the MergeTreeSettings.h header file. ## metric_log {#metric_log} -It is enabled by default. If it`s not, you can do this manually. +It is disabled by default. **Enabling** @@ -3084,3 +3137,21 @@ This setting is only necessary for the migration period and will become obsolete Type: Bool Default: 1 + +## merge_workload {#merge_workload} + +Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for all background merges. Can be overridden by a merge tree setting. + +Default value: "default" + +**See Also** +- [Workload Scheduling](/docs/en/operations/workload-scheduling.md) + +## mutation_workload {#mutation_workload} + +Used to regulate how resources are utilized and shared between mutations and other workloads. Specified value is used as `workload` setting value for all background mutations. Can be overridden by a merge tree setting. + +Default value: "default" + +**See Also** +- [Workload Scheduling](/docs/en/operations/workload-scheduling.md) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index b45dc290797..9879ee35612 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -974,6 +974,24 @@ Default value: false - [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting +## merge_workload + +Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead. + +Default value: an empty string + +**See Also** +- [Workload Scheduling](/docs/en/operations/workload-scheduling.md) + +## mutation_workload + +Used to regulate how resources are utilized and shared between mutations and other workloads. Specified value is used as `workload` setting value for background mutations of this table. If not specified (empty string), then server setting `mutation_workload` is used instead. + +Default value: an empty string + +**See Also** +- [Workload Scheduling](/docs/en/operations/workload-scheduling.md) + ### optimize_row_order Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 670c9c6cbf1..530023df5b7 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1428,6 +1428,13 @@ Average block bytes output by parquet reader. Lowering the configuration in the Default value: `65409 * 256 = 16744704` +### output_format_parquet_write_page_index {#input_format_parquet_max_block_size} + +Could add page index into parquet files. To enable this, need set `output_format_parquet_use_custom_encoder`=`false` and +`output_format_parquet_write_page_index`=`true`. + +Enable by default. + ## Hive format settings {#hive-format-settings} ### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 79d0ca4f151..3d6d776f4da 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1592,19 +1592,19 @@ Default value: `default`. ## parallel_replicas_custom_key_range_lower {#parallel_replicas_custom_key_range_lower} -Allows the filter type `range` to split the work evenly between replicas based on the custom range `[parallel_replicas_custom_key_range_lower, INT_MAX]`. +Allows the filter type `range` to split the work evenly between replicas based on the custom range `[parallel_replicas_custom_key_range_lower, INT_MAX]`. -When used in conjuction with [parallel_replicas_custom_key_range_upper](#parallel_replicas_custom_key_range_upper), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`. +When used in conjuction with [parallel_replicas_custom_key_range_upper](#parallel_replicas_custom_key_range_upper), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`. -Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing. +Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing. ## parallel_replicas_custom_key_range_upper {#parallel_replicas_custom_key_range_upper} Allows the filter type `range` to split the work evenly between replicas based on the custom range `[0, parallel_replicas_custom_key_range_upper]`. A value of 0 disables the upper bound, setting it the max value of the custom key expression. -When used in conjuction with [parallel_replicas_custom_key_range_lower](#parallel_replicas_custom_key_range_lower), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`. +When used in conjuction with [parallel_replicas_custom_key_range_lower](#parallel_replicas_custom_key_range_lower), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`. -Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing. +Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing. ## allow_experimental_parallel_reading_from_replicas @@ -3188,7 +3188,7 @@ Default value: `0`. ## lightweight_deletes_sync {#lightweight_deletes_sync} -The same as 'mutation_sync', but controls only execution of lightweight deletes. +The same as 'mutation_sync', but controls only execution of lightweight deletes. Possible values: @@ -5150,7 +5150,7 @@ Allows using statistic to optimize the order of [prewhere conditions](../../sql- ## analyze_index_with_space_filling_curves -If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x, y)`, and the query has conditions on its arguments, e.g. `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30`, use the space-filling curve for index analysis. +If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x, y)` or `ORDER BY hilbertEncode(x, y)`, and the query has conditions on its arguments, e.g. `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30`, use the space-filling curve for index analysis. ## query_plan_enable_optimizations {#query_plan_enable_optimizations} @@ -5418,6 +5418,15 @@ When set to `false` than all attempts are made with identical timeouts. Default value: `true`. +## allow_deprecated_snowflake_conversion_functions {#allow_deprecated_snowflake_conversion_functions} + +Functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake` are deprecated and disabled by default. +Please use functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` instead. + +To re-enable the deprecated functions (e.g., during a transition period), please set this setting to `true`. + +Default value: `false` + ## allow_experimental_variant_type {#allow_experimental_variant_type} Allows creation of experimental [Variant](../../sql-reference/data-types/variant.md). diff --git a/docs/en/operations/system-tables/error_log.md b/docs/en/operations/system-tables/error_log.md new file mode 100644 index 00000000000..15edef58662 --- /dev/null +++ b/docs/en/operations/system-tables/error_log.md @@ -0,0 +1,39 @@ +--- +slug: /en/operations/system-tables/error_log +--- +# error_log + +Contains history of error values from table `system.errors`, periodically flushed to disk. + +Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. +- `code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code number of the error. +- `error` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) - Name of the error. +- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of times this error happened. +- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Remote exception (i.e. received during one of the distributed queries). + +**Example** + +``` sql +SELECT * FROM system.error_log LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +hostname: clickhouse.eu-central1.internal +event_date: 2024-06-18 +event_time: 2024-06-18 07:32:39 +code: 999 +error: KEEPER_EXCEPTION +value: 2 +remote: 0 +``` + +**See also** + +- [error_log setting](../../operations/server-configuration-parameters/settings.md#error_log) — Enabling and disabling the setting. +- [system.errors](../../operations/system-tables/errors.md) — Contains error codes with the number of times they have been triggered. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 75b855966a3..47094eec3f0 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -113,6 +113,8 @@ Columns: - `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. - `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. +- `used_privileges` ([Array(String)](../../sql-reference/data-types/array.md)) - Privileges which were successfully checked during query execution. +- `missing_privileges` ([Array(String)](../../sql-reference/data-types/array.md)) - Privileges that are missing during query execution. - `query_cache_usage` ([Enum8](../../sql-reference/data-types/enum.md)) — Usage of the [query cache](../query-cache.md) during query execution. Values: - `'Unknown'` = Status unknown. - `'None'` = The query result was neither written into nor read from the query cache. @@ -194,6 +196,8 @@ used_formats: [] used_functions: [] used_storages: [] used_table_functions: [] +used_privileges: [] +missing_privileges: [] query_cache_usage: None ``` diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index ed22679a3e6..df041f5885e 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -36,9 +36,24 @@ $ echo 0 | sudo tee /proc/sys/vm/overcommit_memory Use `perf top` to watch the time spent in the kernel for memory management. Permanent huge pages also do not need to be allocated. -:::warning -If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. The recommended amount of RAM is 32 GB or more. You can use ClickHouse in a system with a small amount of RAM, even with 2 GB of RAM, but it requires additional tuning and can ingest at a low rate. -::: +### Using less than 16GB of RAM + +The recommended amount of RAM is 32 GB or more. + +If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. You can use ClickHouse in a system with a small amount of RAM (as low as 2 GB), but these setups require additional tuning and can only ingest at a low rate. + +When using ClickHouse with less than 16GB of RAM, we recommend the following: + +- Lower the size of the mark cache in the `config.xml`. It can be set as low as 500 MB, but it cannot be set to zero. +- Lower the number of query processing threads down to `1`. +- Lower the `max_block_size` to `8192`. Values as low as `1024` can still be practical. +- Lower `max_download_threads` to `1`. +- Set `input_format_parallel_parsing` and `output_format_parallel_formatting` to `0`. + +Additional notes: +- To flush the memory cached by the memory allocator, you can run the `SYSTEM JEMALLOC PURGE` +command. +- We do not recommend using S3 or Kafka integrations on low-memory machines because they require significant memory for buffers. ## Storage Subsystem {#storage-subsystem} diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index 93a3fecf3c6..f19643a3fa5 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -236,10 +236,10 @@ Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec. Previous example is the same as: ``` bash -$ echo -e "1,2\n3,4" | clickhouse-local --query " +$ echo -e "1,2\n3,4" | clickhouse-local -n --query " CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; - DROP TABLE table" + DROP TABLE table;" Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec. 1 2 3 4 diff --git a/docs/en/operations/workload-scheduling.md b/docs/en/operations/workload-scheduling.md index 24149099892..08629492ec6 100644 --- a/docs/en/operations/workload-scheduling.md +++ b/docs/en/operations/workload-scheduling.md @@ -47,6 +47,8 @@ Example: Queries can be marked with setting `workload` to distinguish different workloads. If `workload` is not set, than value "default" is used. Note that you are able to specify the other value using settings profiles. Setting constraints can be used to make `workload` constant if you want all queries from the user to be marked with fixed value of `workload` setting. +It is possible to assign a `workload` setting for background activities. Merges and mutations are using `merge_workload` and `mutation_workload` server settings correspondingly. These values can also be overridden for specific tables using `merge_workload` and `mutation_workload` merge tree settings + Let's consider an example of a system with two different workloads: "production" and "development". ```sql @@ -151,6 +153,9 @@ Example: ``` - ## See also - [system.scheduler](/docs/en/operations/system-tables/scheduler.md) + - [merge_workload](/docs/en/operations/settings/merge-tree-settings.md#merge_workload) merge tree setting + - [merge_workload](/docs/en/operations/server-configuration-parameters/settings.md#merge_workload) global server setting + - [mutation_workload](/docs/en/operations/settings/merge-tree-settings.md#mutation_workload) merge tree setting + - [mutation_workload](/docs/en/operations/server-configuration-parameters/settings.md#mutation_workload) global server setting diff --git a/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md index d9b44b3ff07..56e54d3faf9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md +++ b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/analysis_of_variance -sidebar_position: 6 +sidebar_position: 101 --- # analysisOfVariance diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index f1b5a6683e5..cdff7dde4a9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/any -sidebar_position: 6 +sidebar_position: 102 --- # any diff --git a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md index 9fbc21910f8..9c6e6b5fead 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/anyheavy -sidebar_position: 103 +sidebar_position: 104 --- # anyHeavy diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast.md b/docs/en/sql-reference/aggregate-functions/reference/anylast.md index 8fcee2cf8e6..e43bc07fbdc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/anylast -sidebar_position: 104 +sidebar_position: 105 --- # anyLast diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md index b6d0806f35d..8f093cfdb61 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/anylast_respect_nulls -sidebar_position: 104 +sidebar_position: 106 --- # anyLast_respect_nulls diff --git a/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md b/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md index 2bb43a9f665..ea2083ebd04 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/approxtopk -sidebar_position: 212 +sidebar_position: 107 --- # approx_top_k diff --git a/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md b/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md index aa884b26d8e..639142331f0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/approxtopsum -sidebar_position: 212 +sidebar_position: 108 --- # approx_top_sum diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md index 2274dd4a5dc..8c6b2b532e8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/argmax -sidebar_position: 106 +sidebar_position: 109 --- # argMax diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md index 297744fb1db..0ab21fe2b52 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/argmin -sidebar_position: 105 +sidebar_position: 110 --- # argMin diff --git a/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md index 3c71129bdb5..c0ac0db33f3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/array_concat_agg -sidebar_position: 110 +sidebar_position: 111 --- # array_concat_agg diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md index 5463d8a1874..7789c30bfe0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/avg -sidebar_position: 5 +sidebar_position: 112 --- # avg diff --git a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md index 99d3bac763d..304d0407d98 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/avgweighted -sidebar_position: 107 +sidebar_position: 113 --- # avgWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/boundrat.md b/docs/en/sql-reference/aggregate-functions/reference/boundrat.md index f3907af8030..d253a250600 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/boundrat.md +++ b/docs/en/sql-reference/aggregate-functions/reference/boundrat.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/boundingRatio -sidebar_position: 2 +sidebar_position: 114 title: boundingRatio --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md b/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md index 57edb47950a..7983c3f2e60 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md +++ b/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/categoricalinformationvalue -sidebar_position: 250 +sidebar_position: 115 title: categoricalInformationValue --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/contingency.md b/docs/en/sql-reference/aggregate-functions/reference/contingency.md index 902c1f4af80..a49ff22febc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/contingency.md +++ b/docs/en/sql-reference/aggregate-functions/reference/contingency.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/contingency -sidebar_position: 350 +sidebar_position: 116 --- # contingency diff --git a/docs/en/sql-reference/aggregate-functions/reference/corr.md b/docs/en/sql-reference/aggregate-functions/reference/corr.md index 5681c942169..c43b4d3b25a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/corr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/corr.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/corr -sidebar_position: 107 +sidebar_position: 117 --- # corr diff --git a/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md b/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md index 718477b28dd..96978863646 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md +++ b/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/corrmatrix -sidebar_position: 108 +sidebar_position: 118 --- # corrMatrix diff --git a/docs/en/sql-reference/aggregate-functions/reference/corrstable.md b/docs/en/sql-reference/aggregate-functions/reference/corrstable.md index b35442a32b6..979cf244245 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/corrstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/corrstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/corrstable -sidebar_position: 107 +sidebar_position: 119 --- # corrStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index ca4067c8d8c..e6f2cdd6aa9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/count -sidebar_position: 1 +sidebar_position: 120 --- # count diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpop.md b/docs/en/sql-reference/aggregate-functions/reference/covarpop.md index 78b9f4cffea..7231f92b8fa 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpop.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarpop -sidebar_position: 37 +sidebar_position: 121 --- # covarPop diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md b/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md index d7400599a49..c8811b3811e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarpopmatrix -sidebar_position: 36 +sidebar_position: 122 --- # covarPopMatrix diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md b/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md index 68e78fc3bd8..48e5368faac 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarpopstable -sidebar_position: 36 +sidebar_position: 123 --- # covarPopStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md index 7d5d5d13f35..92fe213b407 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarsamp -sidebar_position: 37 +sidebar_position: 124 --- # covarSamp diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md b/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md index b71d753f0be..1585c4a9970 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarsampmatrix -sidebar_position: 38 +sidebar_position: 125 --- # covarSampMatrix diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md b/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md index 3e6867b96d6..6764877768e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarsampstable -sidebar_position: 37 +sidebar_position: 126 --- # covarSampStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md index 2424ff95237..db0e1c5eb4c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/cramersv -sidebar_position: 351 +sidebar_position: 127 --- # cramersV diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md index 939c04e3fdc..2ff7ce489d3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/cramersvbiascorrected -sidebar_position: 352 +sidebar_position: 128 --- # cramersVBiasCorrected diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md index 37d9d08cbdb..650135ecfeb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/deltasum -sidebar_position: 141 +sidebar_position: 129 --- # deltaSum diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md index c51d86389b0..ec5cfa5fecc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/deltasumtimestamp -sidebar_position: 141 +sidebar_position: 130 title: deltaSumTimestamp --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/entropy.md b/docs/en/sql-reference/aggregate-functions/reference/entropy.md index fc8d627ecab..7970cdd268b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/entropy.md +++ b/docs/en/sql-reference/aggregate-functions/reference/entropy.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/entropy -sidebar_position: 302 +sidebar_position: 131 --- # entropy diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md index 75041ace7a3..3086a48f819 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md @@ -1,7 +1,7 @@ --- -slug: /en/sql-reference/aggregate-functions/reference/exponentialmovingaverage -sidebar_position: 108 -sidebar_title: exponentialMovingAverage +slug: /en/sql-reference/aggregate-functions/reference/exponentialMovingAverage +sidebar_position: 132 +title: exponentialMovingAverage --- ## exponentialMovingAverage @@ -96,56 +96,56 @@ Result: ``` text ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────────────────────────────────────┐ -│ 1 │ 0 │ 0.067 │ ███▎ │ +│ 1 │ 0 │ 0.067 │ ███▎ │ │ 0 │ 1 │ 0.062 │ ███ │ -│ 0 │ 2 │ 0.058 │ ██▊ │ -│ 0 │ 3 │ 0.054 │ ██▋ │ +│ 0 │ 2 │ 0.058 │ ██▊ │ +│ 0 │ 3 │ 0.054 │ ██▋ │ │ 0 │ 4 │ 0.051 │ ██▌ │ -│ 0 │ 5 │ 0.047 │ ██▎ │ -│ 0 │ 6 │ 0.044 │ ██▏ │ +│ 0 │ 5 │ 0.047 │ ██▎ │ +│ 0 │ 6 │ 0.044 │ ██▏ │ │ 0 │ 7 │ 0.041 │ ██ │ -│ 0 │ 8 │ 0.038 │ █▊ │ -│ 0 │ 9 │ 0.036 │ █▋ │ -│ 0 │ 10 │ 0.033 │ █▋ │ +│ 0 │ 8 │ 0.038 │ █▊ │ +│ 0 │ 9 │ 0.036 │ █▋ │ +│ 0 │ 10 │ 0.033 │ █▋ │ │ 0 │ 11 │ 0.031 │ █▌ │ -│ 0 │ 12 │ 0.029 │ █▍ │ -│ 0 │ 13 │ 0.027 │ █▎ │ -│ 0 │ 14 │ 0.025 │ █▎ │ -│ 0 │ 15 │ 0.024 │ █▏ │ +│ 0 │ 12 │ 0.029 │ █▍ │ +│ 0 │ 13 │ 0.027 │ █▎ │ +│ 0 │ 14 │ 0.025 │ █▎ │ +│ 0 │ 15 │ 0.024 │ █▏ │ │ 0 │ 16 │ 0.022 │ █ │ │ 0 │ 17 │ 0.021 │ █ │ -│ 0 │ 18 │ 0.019 │ ▊ │ -│ 0 │ 19 │ 0.018 │ ▊ │ -│ 0 │ 20 │ 0.017 │ ▋ │ -│ 0 │ 21 │ 0.016 │ ▋ │ -│ 0 │ 22 │ 0.015 │ ▋ │ -│ 0 │ 23 │ 0.014 │ ▋ │ -│ 0 │ 24 │ 0.013 │ ▋ │ -│ 1 │ 25 │ 0.079 │ ███▊ │ +│ 0 │ 18 │ 0.019 │ ▊ │ +│ 0 │ 19 │ 0.018 │ ▊ │ +│ 0 │ 20 │ 0.017 │ ▋ │ +│ 0 │ 21 │ 0.016 │ ▋ │ +│ 0 │ 22 │ 0.015 │ ▋ │ +│ 0 │ 23 │ 0.014 │ ▋ │ +│ 0 │ 24 │ 0.013 │ ▋ │ +│ 1 │ 25 │ 0.079 │ ███▊ │ │ 1 │ 26 │ 0.14 │ ███████ │ -│ 1 │ 27 │ 0.198 │ █████████▊ │ +│ 1 │ 27 │ 0.198 │ █████████▊ │ │ 1 │ 28 │ 0.252 │ ████████████▌ │ │ 1 │ 29 │ 0.302 │ ███████████████ │ -│ 1 │ 30 │ 0.349 │ █████████████████▍ │ +│ 1 │ 30 │ 0.349 │ █████████████████▍ │ │ 1 │ 31 │ 0.392 │ ███████████████████▌ │ -│ 1 │ 32 │ 0.433 │ █████████████████████▋ │ +│ 1 │ 32 │ 0.433 │ █████████████████████▋ │ │ 1 │ 33 │ 0.471 │ ███████████████████████▌ │ -│ 1 │ 34 │ 0.506 │ █████████████████████████▎ │ -│ 1 │ 35 │ 0.539 │ ██████████████████████████▊ │ +│ 1 │ 34 │ 0.506 │ █████████████████████████▎ │ +│ 1 │ 35 │ 0.539 │ ██████████████████████████▊ │ │ 1 │ 36 │ 0.57 │ ████████████████████████████▌ │ -│ 1 │ 37 │ 0.599 │ █████████████████████████████▊ │ -│ 1 │ 38 │ 0.626 │ ███████████████████████████████▎ │ +│ 1 │ 37 │ 0.599 │ █████████████████████████████▊ │ +│ 1 │ 38 │ 0.626 │ ███████████████████████████████▎ │ │ 1 │ 39 │ 0.651 │ ████████████████████████████████▌ │ -│ 1 │ 40 │ 0.674 │ █████████████████████████████████▋ │ -│ 1 │ 41 │ 0.696 │ ██████████████████████████████████▋ │ -│ 1 │ 42 │ 0.716 │ ███████████████████████████████████▋ │ -│ 1 │ 43 │ 0.735 │ ████████████████████████████████████▋ │ -│ 1 │ 44 │ 0.753 │ █████████████████████████████████████▋ │ -│ 1 │ 45 │ 0.77 │ ██████████████████████████████████████▍ │ -│ 1 │ 46 │ 0.785 │ ███████████████████████████████████████▎ │ -│ 1 │ 47 │ 0.8 │ ███████████████████████████████████████▊ │ -│ 1 │ 48 │ 0.813 │ ████████████████████████████████████████▋ │ -│ 1 │ 49 │ 0.825 │ █████████████████████████████████████████▎│ +│ 1 │ 40 │ 0.674 │ █████████████████████████████████▋ │ +│ 1 │ 41 │ 0.696 │ ██████████████████████████████████▋ │ +│ 1 │ 42 │ 0.716 │ ███████████████████████████████████▋ │ +│ 1 │ 43 │ 0.735 │ ████████████████████████████████████▋ │ +│ 1 │ 44 │ 0.753 │ █████████████████████████████████████▋ │ +│ 1 │ 45 │ 0.77 │ ██████████████████████████████████████▍ │ +│ 1 │ 46 │ 0.785 │ ███████████████████████████████████████▎ │ +│ 1 │ 47 │ 0.8 │ ███████████████████████████████████████▊ │ +│ 1 │ 48 │ 0.813 │ ████████████████████████████████████████▋ │ +│ 1 │ 49 │ 0.825 │ █████████████████████████████████████████▎ │ └───────┴──────┴──────────────────────┴────────────────────────────────────────────┘ ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedavg.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedavg.md new file mode 100644 index 00000000000..c729552749a --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedavg.md @@ -0,0 +1,105 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/exponentialTimeDecayedAvg +sidebar_position: 133 +title: exponentialTimeDecayedAvg +--- + +## exponentialTimeDecayedAvg + +Returns the exponentially smoothed weighted moving average of values of a time series at point `t` in time. + +**Syntax** + +```sql +exponentialTimeDecayedAvg(x)(v, t) +``` + +**Arguments** + +- `v` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `t` — Time. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md), [DateTime](../../data-types/datetime.md), [DateTime64](../../data-types/datetime64.md). + +**Parameters** + +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). + +**Returned values** + +- Returns an exponentially smoothed weighted moving average at index `t` in time. [Float64](../../data-types/float.md). + +**Examples** + +Query: + +```sql +SELECT + value, + time, + round(exp_smooth, 3), + bar(exp_smooth, 0, 5, 50) AS bar +FROM + ( + SELECT + (number = 0) OR (number >= 25) AS value, + number AS time, + exponentialTimeDecayedAvg(10)(value, time) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth + FROM numbers(50) + ); +``` + +Response: + +```sql + ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────┐ +1. │ 1 │ 0 │ 1 │ ██████████ │ +2. │ 0 │ 1 │ 0.475 │ ████▊ │ +3. │ 0 │ 2 │ 0.301 │ ███ │ +4. │ 0 │ 3 │ 0.214 │ ██▏ │ +5. │ 0 │ 4 │ 0.162 │ █▌ │ +6. │ 0 │ 5 │ 0.128 │ █▎ │ +7. │ 0 │ 6 │ 0.104 │ █ │ +8. │ 0 │ 7 │ 0.086 │ ▊ │ +9. │ 0 │ 8 │ 0.072 │ ▋ │ +0. │ 0 │ 9 │ 0.061 │ ▌ │ +1. │ 0 │ 10 │ 0.052 │ ▌ │ +2. │ 0 │ 11 │ 0.045 │ ▍ │ +3. │ 0 │ 12 │ 0.039 │ ▍ │ +4. │ 0 │ 13 │ 0.034 │ ▎ │ +5. │ 0 │ 14 │ 0.03 │ ▎ │ +6. │ 0 │ 15 │ 0.027 │ ▎ │ +7. │ 0 │ 16 │ 0.024 │ ▏ │ +8. │ 0 │ 17 │ 0.021 │ ▏ │ +9. │ 0 │ 18 │ 0.018 │ ▏ │ +0. │ 0 │ 19 │ 0.016 │ ▏ │ +1. │ 0 │ 20 │ 0.015 │ ▏ │ +2. │ 0 │ 21 │ 0.013 │ ▏ │ +3. │ 0 │ 22 │ 0.012 │ │ +4. │ 0 │ 23 │ 0.01 │ │ +5. │ 0 │ 24 │ 0.009 │ │ +6. │ 1 │ 25 │ 0.111 │ █ │ +7. │ 1 │ 26 │ 0.202 │ ██ │ +8. │ 1 │ 27 │ 0.283 │ ██▊ │ +9. │ 1 │ 28 │ 0.355 │ ███▌ │ +0. │ 1 │ 29 │ 0.42 │ ████▏ │ +1. │ 1 │ 30 │ 0.477 │ ████▊ │ +2. │ 1 │ 31 │ 0.529 │ █████▎ │ +3. │ 1 │ 32 │ 0.576 │ █████▊ │ +4. │ 1 │ 33 │ 0.618 │ ██████▏ │ +5. │ 1 │ 34 │ 0.655 │ ██████▌ │ +6. │ 1 │ 35 │ 0.689 │ ██████▉ │ +7. │ 1 │ 36 │ 0.719 │ ███████▏ │ +8. │ 1 │ 37 │ 0.747 │ ███████▍ │ +9. │ 1 │ 38 │ 0.771 │ ███████▋ │ +0. │ 1 │ 39 │ 0.793 │ ███████▉ │ +1. │ 1 │ 40 │ 0.813 │ ████████▏ │ +2. │ 1 │ 41 │ 0.831 │ ████████▎ │ +3. │ 1 │ 42 │ 0.848 │ ████████▍ │ +4. │ 1 │ 43 │ 0.862 │ ████████▌ │ +5. │ 1 │ 44 │ 0.876 │ ████████▊ │ +6. │ 1 │ 45 │ 0.888 │ ████████▉ │ +7. │ 1 │ 46 │ 0.898 │ ████████▉ │ +8. │ 1 │ 47 │ 0.908 │ █████████ │ +9. │ 1 │ 48 │ 0.917 │ █████████▏ │ +0. │ 1 │ 49 │ 0.925 │ █████████▏ │ + └───────┴──────┴──────────────────────┴────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedcount.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedcount.md new file mode 100644 index 00000000000..b73d6c2503d --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedcount.md @@ -0,0 +1,104 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/exponentialTimeDecayedCount +sidebar_position: 134 +title: exponentialTimeDecayedCount +--- + +## exponentialTimeDecayedCount + +Returns the cumulative exponential decay over a time series at the index `t` in time. + +**Syntax** + +```sql +exponentialTimeDecayedCount(x)(t) +``` + +**Arguments** + +- `t` — Time. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md), [DateTime](../../data-types/datetime.md), [DateTime64](../../data-types/datetime64.md). + +**Parameters** + +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). + +**Returned values** + +- Returns the cumulative exponential decay at the given point in time. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT + value, + time, + round(exp_smooth, 3), + bar(exp_smooth, 0, 20, 50) AS bar +FROM +( + SELECT + (number % 5) = 0 AS value, + number AS time, + exponentialTimeDecayedCount(10)(time) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth + FROM numbers(50) +); +``` + +Result: + +```response + ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────────────────────┐ + 1. │ 1 │ 0 │ 1 │ ██▌ │ + 2. │ 0 │ 1 │ 1.905 │ ████▊ │ + 3. │ 0 │ 2 │ 2.724 │ ██████▊ │ + 4. │ 0 │ 3 │ 3.464 │ ████████▋ │ + 5. │ 0 │ 4 │ 4.135 │ ██████████▎ │ + 6. │ 1 │ 5 │ 4.741 │ ███████████▊ │ + 7. │ 0 │ 6 │ 5.29 │ █████████████▏ │ + 8. │ 0 │ 7 │ 5.787 │ ██████████████▍ │ + 9. │ 0 │ 8 │ 6.236 │ ███████████████▌ │ +10. │ 0 │ 9 │ 6.643 │ ████████████████▌ │ +11. │ 1 │ 10 │ 7.01 │ █████████████████▌ │ +12. │ 0 │ 11 │ 7.343 │ ██████████████████▎ │ +13. │ 0 │ 12 │ 7.644 │ ███████████████████ │ +14. │ 0 │ 13 │ 7.917 │ ███████████████████▊ │ +15. │ 0 │ 14 │ 8.164 │ ████████████████████▍ │ +16. │ 1 │ 15 │ 8.387 │ ████████████████████▉ │ +17. │ 0 │ 16 │ 8.589 │ █████████████████████▍ │ +18. │ 0 │ 17 │ 8.771 │ █████████████████████▉ │ +19. │ 0 │ 18 │ 8.937 │ ██████████████████████▎ │ +20. │ 0 │ 19 │ 9.086 │ ██████████████████████▋ │ +21. │ 1 │ 20 │ 9.222 │ ███████████████████████ │ +22. │ 0 │ 21 │ 9.344 │ ███████████████████████▎ │ +23. │ 0 │ 22 │ 9.455 │ ███████████████████████▋ │ +24. │ 0 │ 23 │ 9.555 │ ███████████████████████▉ │ +25. │ 0 │ 24 │ 9.646 │ ████████████████████████ │ +26. │ 1 │ 25 │ 9.728 │ ████████████████████████▎ │ +27. │ 0 │ 26 │ 9.802 │ ████████████████████████▌ │ +28. │ 0 │ 27 │ 9.869 │ ████████████████████████▋ │ +29. │ 0 │ 28 │ 9.93 │ ████████████████████████▊ │ +30. │ 0 │ 29 │ 9.985 │ ████████████████████████▉ │ +31. │ 1 │ 30 │ 10.035 │ █████████████████████████ │ +32. │ 0 │ 31 │ 10.08 │ █████████████████████████▏ │ +33. │ 0 │ 32 │ 10.121 │ █████████████████████████▎ │ +34. │ 0 │ 33 │ 10.158 │ █████████████████████████▍ │ +35. │ 0 │ 34 │ 10.191 │ █████████████████████████▍ │ +36. │ 1 │ 35 │ 10.221 │ █████████████████████████▌ │ +37. │ 0 │ 36 │ 10.249 │ █████████████████████████▌ │ +38. │ 0 │ 37 │ 10.273 │ █████████████████████████▋ │ +39. │ 0 │ 38 │ 10.296 │ █████████████████████████▋ │ +40. │ 0 │ 39 │ 10.316 │ █████████████████████████▊ │ +41. │ 1 │ 40 │ 10.334 │ █████████████████████████▊ │ +42. │ 0 │ 41 │ 10.351 │ █████████████████████████▉ │ +43. │ 0 │ 42 │ 10.366 │ █████████████████████████▉ │ +44. │ 0 │ 43 │ 10.379 │ █████████████████████████▉ │ +45. │ 0 │ 44 │ 10.392 │ █████████████████████████▉ │ +46. │ 1 │ 45 │ 10.403 │ ██████████████████████████ │ +47. │ 0 │ 46 │ 10.413 │ ██████████████████████████ │ +48. │ 0 │ 47 │ 10.422 │ ██████████████████████████ │ +49. │ 0 │ 48 │ 10.43 │ ██████████████████████████ │ +50. │ 0 │ 49 │ 10.438 │ ██████████████████████████ │ + └───────┴──────┴──────────────────────┴────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedmax.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedmax.md new file mode 100644 index 00000000000..06dc5313904 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedmax.md @@ -0,0 +1,105 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/exponentialTimeDecayedMax +sidebar_position: 135 +title: exponentialTimeDecayedMax +--- + +## exponentialTimeDecayedMax + +Returns the maximum of the computed exponentially smoothed moving average at index `t` in time with that at `t-1`. + +**Syntax** + +```sql +exponentialTimeDecayedMax(x)(value, timeunit) +``` + +**Arguments** + +- `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md), [DateTime](../../data-types/datetime.md), [DateTime64](../../data-types/datetime64.md). + +**Parameters** + +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). + +**Returned values** + +- Returns the maximum of the exponentially smoothed weighted moving average at `t` and `t-1`. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT + value, + time, + round(exp_smooth, 3), + bar(exp_smooth, 0, 5, 50) AS bar +FROM + ( + SELECT + (number = 0) OR (number >= 25) AS value, + number AS time, + exponentialTimeDecayedMax(10)(value, time) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth + FROM numbers(50) + ); +``` + +Result: + +```response + ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────┐ + 1. │ 1 │ 0 │ 1 │ ██████████ │ + 2. │ 0 │ 1 │ 0.905 │ █████████ │ + 3. │ 0 │ 2 │ 0.819 │ ████████▏ │ + 4. │ 0 │ 3 │ 0.741 │ ███████▍ │ + 5. │ 0 │ 4 │ 0.67 │ ██████▋ │ + 6. │ 0 │ 5 │ 0.607 │ ██████ │ + 7. │ 0 │ 6 │ 0.549 │ █████▍ │ + 8. │ 0 │ 7 │ 0.497 │ ████▉ │ + 9. │ 0 │ 8 │ 0.449 │ ████▍ │ +10. │ 0 │ 9 │ 0.407 │ ████ │ +11. │ 0 │ 10 │ 0.368 │ ███▋ │ +12. │ 0 │ 11 │ 0.333 │ ███▎ │ +13. │ 0 │ 12 │ 0.301 │ ███ │ +14. │ 0 │ 13 │ 0.273 │ ██▋ │ +15. │ 0 │ 14 │ 0.247 │ ██▍ │ +16. │ 0 │ 15 │ 0.223 │ ██▏ │ +17. │ 0 │ 16 │ 0.202 │ ██ │ +18. │ 0 │ 17 │ 0.183 │ █▊ │ +19. │ 0 │ 18 │ 0.165 │ █▋ │ +20. │ 0 │ 19 │ 0.15 │ █▍ │ +21. │ 0 │ 20 │ 0.135 │ █▎ │ +22. │ 0 │ 21 │ 0.122 │ █▏ │ +23. │ 0 │ 22 │ 0.111 │ █ │ +24. │ 0 │ 23 │ 0.1 │ █ │ +25. │ 0 │ 24 │ 0.091 │ ▉ │ +26. │ 1 │ 25 │ 1 │ ██████████ │ +27. │ 1 │ 26 │ 1 │ ██████████ │ +28. │ 1 │ 27 │ 1 │ ██████████ │ +29. │ 1 │ 28 │ 1 │ ██████████ │ +30. │ 1 │ 29 │ 1 │ ██████████ │ +31. │ 1 │ 30 │ 1 │ ██████████ │ +32. │ 1 │ 31 │ 1 │ ██████████ │ +33. │ 1 │ 32 │ 1 │ ██████████ │ +34. │ 1 │ 33 │ 1 │ ██████████ │ +35. │ 1 │ 34 │ 1 │ ██████████ │ +36. │ 1 │ 35 │ 1 │ ██████████ │ +37. │ 1 │ 36 │ 1 │ ██████████ │ +38. │ 1 │ 37 │ 1 │ ██████████ │ +39. │ 1 │ 38 │ 1 │ ██████████ │ +40. │ 1 │ 39 │ 1 │ ██████████ │ +41. │ 1 │ 40 │ 1 │ ██████████ │ +42. │ 1 │ 41 │ 1 │ ██████████ │ +43. │ 1 │ 42 │ 1 │ ██████████ │ +44. │ 1 │ 43 │ 1 │ ██████████ │ +45. │ 1 │ 44 │ 1 │ ██████████ │ +46. │ 1 │ 45 │ 1 │ ██████████ │ +47. │ 1 │ 46 │ 1 │ ██████████ │ +48. │ 1 │ 47 │ 1 │ ██████████ │ +49. │ 1 │ 48 │ 1 │ ██████████ │ +50. │ 1 │ 49 │ 1 │ ██████████ │ + └───────┴──────┴──────────────────────┴────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedsum.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedsum.md new file mode 100644 index 00000000000..617cd265dac --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedsum.md @@ -0,0 +1,105 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/exponentialTimeDecayedSum +sidebar_position: 136 +title: exponentialTimeDecayedSum +--- + +## exponentialTimeDecayedSum + +Returns the sum of exponentially smoothed moving average values of a time series at the index `t` in time. + +**Syntax** + +```sql +exponentialTimeDecayedSum(x)(v, t) +``` + +**Arguments** + +- `v` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `t` — Time. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md), [DateTime](../../data-types/datetime.md), [DateTime64](../../data-types/datetime64.md). + +**Parameters** + +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). + +**Returned values** + +- Returns the sum of exponentially smoothed moving average values at the given point in time. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT + value, + time, + round(exp_smooth, 3), + bar(exp_smooth, 0, 10, 50) AS bar +FROM + ( + SELECT + (number = 0) OR (number >= 25) AS value, + number AS time, + exponentialTimeDecayedSum(10)(value, time) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth + FROM numbers(50) + ); +``` + +Result: + +```response + ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar───────────────────────────────────────────────┐ + 1. │ 1 │ 0 │ 1 │ █████ │ + 2. │ 0 │ 1 │ 0.905 │ ████▌ │ + 3. │ 0 │ 2 │ 0.819 │ ████ │ + 4. │ 0 │ 3 │ 0.741 │ ███▋ │ + 5. │ 0 │ 4 │ 0.67 │ ███▎ │ + 6. │ 0 │ 5 │ 0.607 │ ███ │ + 7. │ 0 │ 6 │ 0.549 │ ██▋ │ + 8. │ 0 │ 7 │ 0.497 │ ██▍ │ + 9. │ 0 │ 8 │ 0.449 │ ██▏ │ +10. │ 0 │ 9 │ 0.407 │ ██ │ +11. │ 0 │ 10 │ 0.368 │ █▊ │ +12. │ 0 │ 11 │ 0.333 │ █▋ │ +13. │ 0 │ 12 │ 0.301 │ █▌ │ +14. │ 0 │ 13 │ 0.273 │ █▎ │ +15. │ 0 │ 14 │ 0.247 │ █▏ │ +16. │ 0 │ 15 │ 0.223 │ █ │ +17. │ 0 │ 16 │ 0.202 │ █ │ +18. │ 0 │ 17 │ 0.183 │ ▉ │ +19. │ 0 │ 18 │ 0.165 │ ▊ │ +20. │ 0 │ 19 │ 0.15 │ ▋ │ +21. │ 0 │ 20 │ 0.135 │ ▋ │ +22. │ 0 │ 21 │ 0.122 │ ▌ │ +23. │ 0 │ 22 │ 0.111 │ ▌ │ +24. │ 0 │ 23 │ 0.1 │ ▌ │ +25. │ 0 │ 24 │ 0.091 │ ▍ │ +26. │ 1 │ 25 │ 1.082 │ █████▍ │ +27. │ 1 │ 26 │ 1.979 │ █████████▉ │ +28. │ 1 │ 27 │ 2.791 │ █████████████▉ │ +29. │ 1 │ 28 │ 3.525 │ █████████████████▋ │ +30. │ 1 │ 29 │ 4.19 │ ████████████████████▉ │ +31. │ 1 │ 30 │ 4.791 │ ███████████████████████▉ │ +32. │ 1 │ 31 │ 5.335 │ ██████████████████████████▋ │ +33. │ 1 │ 32 │ 5.827 │ █████████████████████████████▏ │ +34. │ 1 │ 33 │ 6.273 │ ███████████████████████████████▎ │ +35. │ 1 │ 34 │ 6.676 │ █████████████████████████████████▍ │ +36. │ 1 │ 35 │ 7.041 │ ███████████████████████████████████▏ │ +37. │ 1 │ 36 │ 7.371 │ ████████████████████████████████████▊ │ +38. │ 1 │ 37 │ 7.669 │ ██████████████████████████████████████▎ │ +39. │ 1 │ 38 │ 7.939 │ ███████████████████████████████████████▋ │ +40. │ 1 │ 39 │ 8.184 │ ████████████████████████████████████████▉ │ +41. │ 1 │ 40 │ 8.405 │ ██████████████████████████████████████████ │ +42. │ 1 │ 41 │ 8.605 │ ███████████████████████████████████████████ │ +43. │ 1 │ 42 │ 8.786 │ ███████████████████████████████████████████▉ │ +44. │ 1 │ 43 │ 8.95 │ ████████████████████████████████████████████▊ │ +45. │ 1 │ 44 │ 9.098 │ █████████████████████████████████████████████▍ │ +46. │ 1 │ 45 │ 9.233 │ ██████████████████████████████████████████████▏ │ +47. │ 1 │ 46 │ 9.354 │ ██████████████████████████████████████████████▊ │ +48. │ 1 │ 47 │ 9.464 │ ███████████████████████████████████████████████▎ │ +49. │ 1 │ 48 │ 9.563 │ ███████████████████████████████████████████████▊ │ +50. │ 1 │ 49 │ 9.653 │ ████████████████████████████████████████████████▎ │ + └───────┴──────┴──────────────────────┴───────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/first_value.md b/docs/en/sql-reference/aggregate-functions/reference/first_value.md index 0c26b66c64a..2cd0e1fa16f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/first_value.md +++ b/docs/en/sql-reference/aggregate-functions/reference/first_value.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/first_value -sidebar_position: 7 +sidebar_position: 137 --- # first_value diff --git a/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md b/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md index ae17153085c..4abb3e03226 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md +++ b/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md @@ -1,6 +1,6 @@ --- -slug: /en/sql-reference/aggregate-functions/reference/flamegraph -sidebar_position: 110 +slug: /en/sql-reference/aggregate-functions/reference/flame_graph +sidebar_position: 138 --- # flameGraph diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md index a38e35a72ad..1a87e3aeba9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparray -sidebar_position: 110 +sidebar_position: 139 --- # groupArray diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md index d745e8a0e7a..c6b23c2f808 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparrayinsertat -sidebar_position: 112 +sidebar_position: 140 --- # groupArrayInsertAt diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md index 5cac88be073..a370f595923 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparrayintersect -sidebar_position: 115 +sidebar_position: 141 --- # groupArrayIntersect diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md index 9b48ee54ecd..ff62dcdde9b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparraylast -sidebar_position: 110 +sidebar_position: 142 --- # groupArrayLast diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md index 32c0608afeb..6b6c4830535 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparraymovingavg -sidebar_position: 114 +sidebar_position: 143 --- # groupArrayMovingAvg diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md index 6f2a60dd080..d1fa6fce9b0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparraymovingsum -sidebar_position: 113 +sidebar_position: 144 --- # groupArrayMovingSum diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md index 393087161df..38ddae48ee7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparraysample -sidebar_position: 114 +sidebar_position: 145 --- # groupArraySample diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md index 9bee0c29e7a..22a150bb8fb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md @@ -1,6 +1,7 @@ - --- - toc_priority: 112 - --- +--- +slug: /en/sql-reference/aggregate-functions/reference/grouparraysorted +sidebar_position: 146 +--- # groupArraySorted {#groupArraySorted} diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md index 3d833555a43..eee383d84e9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitand -sidebar_position: 125 +sidebar_position: 147 --- # groupBitAnd diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md index 02b9e0e8821..23b686e29b2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitmap -sidebar_position: 128 +sidebar_position: 148 --- # groupBitmap diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md index 1e649645e75..77bbf7d3d2c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitmapand -sidebar_position: 129 +sidebar_position: 149 title: groupBitmapAnd --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md index c88c80ceff2..7bb3dc689e8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitmapor -sidebar_position: 130 +sidebar_position: 150 title: groupBitmapOr --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md index aa24b3d2128..3212e94a47b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitmapxor -sidebar_position: 131 +sidebar_position: 151 title: groupBitmapXor --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md index 138ee998405..802b839d56e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitor -sidebar_position: 126 +sidebar_position: 152 --- # groupBitOr diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md index 168335a010c..94891891d64 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitxor -sidebar_position: 127 +sidebar_position: 153 --- # groupBitXor diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md b/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md index fe5f714c307..0462f4a4ab2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupuniqarray -sidebar_position: 111 +sidebar_position: 154 --- # groupUniqArray diff --git a/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md b/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md index 5990345b765..66e23a716ba 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/intervalLengthSum -sidebar_position: 146 +sidebar_position: 155 sidebar_label: intervalLengthSum title: intervalLengthSum --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md index d159eec7ce6..33afcdfbf38 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest -sidebar_position: 300 +sidebar_position: 156 sidebar_label: kolmogorovSmirnovTest --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md index e1a29973fcf..c543831addc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/kurtpop -sidebar_position: 153 +sidebar_position: 157 --- # kurtPop diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md index 911c2bfbe74..57e80729454 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/kurtsamp -sidebar_position: 154 +sidebar_position: 158 --- # kurtSamp diff --git a/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md b/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md index 4f73aadb8da..673f3cb69c7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md +++ b/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets -sidebar_position: 312 +sidebar_position: 159 sidebar_label: largestTriangleThreeBuckets --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/last_value.md b/docs/en/sql-reference/aggregate-functions/reference/last_value.md index 21a86a5f130..b2aa5c86d81 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/last_value.md +++ b/docs/en/sql-reference/aggregate-functions/reference/last_value.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/last_value -sidebar_position: 8 +sidebar_position: 160 --- # last_value diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md index af744f445d9..17f6afecde2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/mannwhitneyutest -sidebar_position: 310 +sidebar_position: 161 sidebar_label: mannWhitneyUTest --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/max.md b/docs/en/sql-reference/aggregate-functions/reference/max.md index 4bb2145d683..12c8800ef7f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/max.md +++ b/docs/en/sql-reference/aggregate-functions/reference/max.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/max -sidebar_position: 3 +sidebar_position: 162 title: max --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md index db99b900a3e..c65e31114ff 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/maxintersections -sidebar_position: 360 +sidebar_position: 163 title: maxIntersections --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md index 7dd63f09316..d5c2b0bd3c2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/maxintersectionsposition -sidebar_position: 361 +sidebar_position: 164 title: maxIntersectionsPosition --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md index 4d8c67e1b90..c9c6913249c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/maxmap -sidebar_position: 143 +sidebar_position: 165 --- # maxMap diff --git a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md index 1cf2bebf26f..19afb5ae742 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/meanztest -sidebar_position: 303 +sidebar_position: 166 sidebar_label: meanZTest --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index 2a166c83dad..dcf174254ac 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/median -sidebar_position: 212 +sidebar_position: 167 --- # median diff --git a/docs/en/sql-reference/aggregate-functions/reference/min.md b/docs/en/sql-reference/aggregate-functions/reference/min.md index cca515b76e8..6bfcaf020c8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/min.md +++ b/docs/en/sql-reference/aggregate-functions/reference/min.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/min -sidebar_position: 2 +sidebar_position: 168 title: min --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/minmap.md b/docs/en/sql-reference/aggregate-functions/reference/minmap.md index 5436e1fc6a6..b1fbb9e49f3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/minmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/minmap.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/minmap -sidebar_position: 142 +sidebar_position: 169 --- # minMap diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantile.md b/docs/en/sql-reference/aggregate-functions/reference/quantile.md index 91b6b1b0d80..d5278125cbc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantile.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantile.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantile -sidebar_position: 200 +sidebar_position: 170 --- # quantile diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md b/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md index 7352781d126..9582f264a6f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileGK -sidebar_position: 204 +sidebar_position: 175 --- # quantileGK diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md index 4377f2f1b17..4469438db6a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantilebfloat16 -sidebar_position: 209 +sidebar_position: 171 title: quantileBFloat16 --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md b/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md index f9acd2e20cb..fc9db7ef08d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileddsketch -sidebar_position: 211 +sidebar_position: 171 title: quantileDD --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md index 7235c47da70..0ac4b5e3a51 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiledeterministic -sidebar_position: 206 +sidebar_position: 172 --- # quantileDeterministic diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index d7d7413c283..46873bcd2b6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileexact -sidebar_position: 202 +sidebar_position: 173 --- # quantileExact Functions diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md index 34def8d7411..4ce212888c4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileexactweighted -sidebar_position: 203 +sidebar_position: 174 --- # quantileExactWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md index 41d2627fb7b..9eb4fde6102 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileInterpolatedWeighted -sidebar_position: 203 +sidebar_position: 176 --- # quantileInterpolatedWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 856d447ac13..e2c3295221d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiles -sidebar_position: 201 +sidebar_position: 177 --- # quantiles Functions diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md index 796e87b02d8..ece54ca24ab 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiletdigest -sidebar_position: 207 +sidebar_position: 178 --- # quantileTDigest diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index b3e21e0e69e..7f8f7f53a97 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiletdigestweighted -sidebar_position: 208 +sidebar_position: 179 --- # quantileTDigestWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md index b5b1c8a0c01..78050fe5b5e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiletiming -sidebar_position: 204 +sidebar_position: 180 --- # quantileTiming diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md index df483aac01e..c5fff0825c3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiletimingweighted -sidebar_position: 205 +sidebar_position: 181 --- # quantileTimingWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md index 27f2dd124e4..eb995923d97 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/rankCorr -sidebar_position: 145 +sidebar_position: 182 --- # rankCorr diff --git a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md index ea3dbff8691..2aebccfdc53 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/simplelinearregression -sidebar_position: 220 +sidebar_position: 183 --- # simpleLinearRegression diff --git a/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md index e39af77059a..21344b58ba6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md +++ b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/singlevalueornull -sidebar_position: 220 +sidebar_position: 184 --- # singleValueOrNull diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md index 379fdcfa7c2..58ea33edb81 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/skewpop -sidebar_position: 150 +sidebar_position: 185 --- # skewPop diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md index 9e64b186db3..9c32a0183ef 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/skewsamp -sidebar_position: 151 +sidebar_position: 186 --- # skewSamp diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md index 62edc221858..8791847ead0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sparkbar -sidebar_position: 311 +sidebar_position: 187 sidebar_label: sparkbar --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md index d2406197ecc..e52a442d76a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stddevpop -sidebar_position: 30 +sidebar_position: 188 --- # stddevPop @@ -25,7 +25,7 @@ stddevPop(x) **Returned value** -Square root of standard deviation of `x`. [Float64](../../data-types/float.md). +- Square root of standard deviation of `x`. [Float64](../../data-types/float.md). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md b/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md index a8ad5956ae8..2051ce7b125 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stddevpopstable -sidebar_position: 30 +sidebar_position: 189 --- # stddevPopStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md index cf8b9b20d63..e2cad40b267 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stddevsamp -sidebar_position: 31 +sidebar_position: 190 --- # stddevSamp diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md b/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md index 9ae1f5f8411..205e10cced5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stddevsampstable -sidebar_position: 31 +sidebar_position: 191 --- # stddevSampStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md index 7ab9e1d3256..6cc5cbd8fe1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stochasticlinearregression -sidebar_position: 221 +sidebar_position: 192 --- # stochasticLinearRegression {#agg_functions_stochasticlinearregression_parameters} diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md index 4bf5529ddcb..dca452a1702 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stochasticlogisticregression -sidebar_position: 222 +sidebar_position: 193 --- # stochasticLogisticRegression diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md index fa320b4e336..1605e8efa13 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/studentttest -sidebar_position: 300 +sidebar_position: 194 sidebar_label: studentTTest --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/sum.md b/docs/en/sql-reference/aggregate-functions/reference/sum.md index a33a99f63e6..19636f003c7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sum -sidebar_position: 4 +sidebar_position: 195 --- # sum diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md index a59b87022d6..ff4ddcec142 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sumcount -sidebar_position: 144 +sidebar_position: 196 title: sumCount --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md b/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md index 1a729b18b42..ed58b3c3369 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sumkahan -sidebar_position: 145 +sidebar_position: 197 title: sumKahan --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/summap.md b/docs/en/sql-reference/aggregate-functions/reference/summap.md index fd3f095511b..4ff937f1e4f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/summap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/summap.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/summap -sidebar_position: 141 +sidebar_position: 198 --- # sumMap diff --git a/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md b/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md index 7c0aa31e459..e36818e2ab8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md +++ b/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/summapwithoverflow -sidebar_position: 141 +sidebar_position: 199 --- # sumMapWithOverflow diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md index a120eafe738..5fe3cb7de8e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sumwithoverflow -sidebar_position: 140 +sidebar_position: 200 --- # sumWithOverflow diff --git a/docs/en/sql-reference/aggregate-functions/reference/theilsu.md b/docs/en/sql-reference/aggregate-functions/reference/theilsu.md index ef19438a53a..73b063cf965 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/theilsu.md +++ b/docs/en/sql-reference/aggregate-functions/reference/theilsu.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/theilsu -sidebar_position: 353 +sidebar_position: 201 --- # theilsU diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md index dd4b2251a8a..695e9b1d7d8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/topk -sidebar_position: 108 +sidebar_position: 202 --- # topK diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md index d2a469828fc..148a8b6ea18 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/topkweighted -sidebar_position: 109 +sidebar_position: 203 --- # topKWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md index b1c8336630b..c1dc6a29e58 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniq -sidebar_position: 190 +sidebar_position: 204 --- # uniq diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index 18f44d2fcc4..70bb4463140 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqcombined -sidebar_position: 192 +sidebar_position: 205 --- # uniqCombined diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md index b6e09bcaae3..014984f6291 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqcombined64 -sidebar_position: 193 +sidebar_position: 206 --- # uniqCombined64 diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md index fd68a464881..da4d4aa9588 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqexact -sidebar_position: 191 +sidebar_position: 207 --- # uniqExact diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md index 8594ebb3782..78d84edf1be 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqhll12 -sidebar_position: 194 +sidebar_position: 208 --- # uniqHLL12 diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md index 45970f144cb..fbae42117ee 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqthetasketch -sidebar_position: 195 +sidebar_position: 209 title: uniqTheta --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index 4e010248f6e..182e830f19f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -1,33 +1,28 @@ --- title: "varPop" -slug: "/en/sql-reference/aggregate-functions/reference/varpop" -sidebar_position: 32 +slug: "/en/sql-reference/aggregate-functions/reference/varPop" +sidebar_position: 210 --- -This page covers the `varPop` and `varPopStable` functions available in ClickHouse. - ## varPop -Calculates the population covariance between two data columns. The population covariance measures the degree to which two variables vary together. Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`. +Calculates the population variance. **Syntax** ```sql -covarPop(x, y) +varPop(x) ``` +Alias: `VAR_POP`. + **Parameters** -- `x`: The first data column. [Numeric](../../../native-protocol/columns.md) -- `y`: The second data column. [Numeric](../../../native-protocol/columns.md) +- `x`: Population of values to find the population variance of. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). **Returned value** -Returns an integer of type `Float64`. - -**Implementation details** - -This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable`](#varpopstable) function. +- Returns the population variance of `x`. [`Float64`](../../data-types/float.md). **Example** @@ -37,69 +32,21 @@ Query: DROP TABLE IF EXISTS test_data; CREATE TABLE test_data ( - x Int32, - y Int32 + x UInt8, ) ENGINE = Memory; -INSERT INTO test_data VALUES (1, 2), (2, 3), (3, 5), (4, 6), (5, 8); +INSERT INTO test_data VALUES (3), (3), (3), (4), (4), (5), (5), (7), (11), (15); SELECT - covarPop(x, y) AS covar_pop + varPop(x) AS var_pop FROM test_data; ``` Result: ```response -3 -``` - -## varPopStable - -Calculates population covariance between two data columns using a stable, numerically accurate method to calculate the variance. This function is designed to provide reliable results even with large datasets or values that might cause numerical instability in other implementations. - -**Syntax** - -```sql -covarPopStable(x, y) -``` - -**Parameters** - -- `x`: The first data column. [String literal](../../syntax#syntax-string-literal) -- `y`: The second data column. [Expression](../../syntax#syntax-expressions) - -**Returned value** - -Returns an integer of type `Float64`. - -**Implementation details** - -Unlike [`varPop`](#varpop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations. - -**Example** - -Query: - -```sql -DROP TABLE IF EXISTS test_data; -CREATE TABLE test_data -( - x Int32, - y Int32 -) -ENGINE = Memory; - -INSERT INTO test_data VALUES (1, 2), (2, 9), (9, 5), (4, 6), (5, 8); - -SELECT - covarPopStable(x, y) AS covar_pop_stable -FROM test_data; -``` - -Result: - -```response -0.5999999999999999 +┌─var_pop─┐ +│ 14.4 │ +└─────────┘ ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpopstable.md b/docs/en/sql-reference/aggregate-functions/reference/varpopstable.md new file mode 100644 index 00000000000..68037a5a533 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/varpopstable.md @@ -0,0 +1,52 @@ +--- +title: "varPopStable" +slug: "/en/sql-reference/aggregate-functions/reference/varpopstable" +sidebar_position: 211 +--- + +## varPopStable + +Returns the population variance. Unlike [`varPop`](../reference/varpop.md), this function uses a [numerically stable](https://en.wikipedia.org/wiki/Numerical_stability) algorithm. It works slower but provides a lower computational error. + +**Syntax** + +```sql +varPopStable(x) +``` + +Alias: `VAR_POP_STABLE`. + +**Parameters** + +- `x`: Population of values to find the population variance of. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). + +**Returned value** + +- Returns the population variance of `x`. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x UInt8, +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (3),(3),(3),(4),(4),(5),(5),(7),(11),(15); + +SELECT + varPopStable(x) AS var_pop_stable +FROM test_data; +``` + +Result: + +```response +┌─var_pop_stable─┐ +│ 14.4 │ +└────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md index bd1cfa5742a..87a97c15dd8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md @@ -1,11 +1,9 @@ --- title: "varSamp" -slug: /en/sql-reference/aggregate-functions/reference/varsamp -sidebar_position: 33 +slug: /en/sql-reference/aggregate-functions/reference/varSamp +sidebar_position: 212 --- -This page contains information on the `varSamp` and `varSampStable` ClickHouse functions. - ## varSamp Calculate the sample variance of a data set. @@ -13,24 +11,27 @@ Calculate the sample variance of a data set. **Syntax** ```sql -varSamp(expr) +varSamp(x) ``` +Alias: `VAR_SAMP`. + **Parameters** -- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions) +- `x`: The population for which you want to calculate the sample variance. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). **Returned value** -Returns a Float64 value representing the sample variance of the input data set. + +- Returns the sample variance of the input data set `x`. [Float64](../../data-types/float.md). **Implementation details** -The `varSamp()` function calculates the sample variance using the following formula: +The `varSamp` function calculates the sample variance using the following formula: -```plaintext -∑(x - mean(x))^2 / (n - 1) -``` +$$ +\sum\frac{(x - \text{mean}(x))^2}{(n - 1)} +$$ Where: @@ -38,91 +39,29 @@ Where: - `mean(x)` is the arithmetic mean of the data set. - `n` is the number of data points in the data set. -The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead. - -This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable`](#varsampstable) function. +The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use [`varPop`](../reference/varpop.md) instead. **Example** Query: ```sql -CREATE TABLE example_table +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data ( - id UInt64, - value Float64 + x Float64 ) -ENGINE = MergeTree -ORDER BY id; +ENGINE = Memory; -INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); +INSERT INTO test_data VALUES (10.5), (12.3), (9.8), (11.2), (10.7); -SELECT varSamp(value) FROM example_table; +SELECT round(varSamp(x),3) AS var_samp FROM test_data; ``` Response: ```response -0.8650000000000091 +┌─var_samp─┐ +│ 0.865 │ +└──────────┘ ``` - -## varSampStable - -Calculate the sample variance of a data set using a numerically stable algorithm. - -**Syntax** - -```sql -varSampStable(expr) -``` - -**Parameters** - -- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions) - -**Returned value** - -The `varSampStable` function returns a Float64 value representing the sample variance of the input data set. - -**Implementation details** - -The `varSampStable` function calculates the sample variance using the same formula as the [`varSamp`](#varsamp) function: - -```plaintext -∑(x - mean(x))^2 / (n - 1) -``` - -Where: -- `x` is each individual data point in the data set. -- `mean(x)` is the arithmetic mean of the data set. -- `n` is the number of data points in the data set. - -The difference between `varSampStable` and `varSamp` is that `varSampStable` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values. - -Like `varSamp`, the `varSampStable` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable`](./varpop#varpopstable) function instead. - -**Example** - -Query: - -```sql -CREATE TABLE example_table -( - id UInt64, - value Float64 -) -ENGINE = MergeTree -ORDER BY id; - -INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); - -SELECT varSampStable(value) FROM example_table; -``` - -Response: - -```response -0.865 -``` - -This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp` due to the more precise handling of floating-point arithmetic. diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsampstable.md b/docs/en/sql-reference/aggregate-functions/reference/varsampstable.md new file mode 100644 index 00000000000..ebe647e1951 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/varsampstable.md @@ -0,0 +1,63 @@ +--- +title: "varSampStable" +slug: /en/sql-reference/aggregate-functions/reference/varsampstable +sidebar_position: 213 +--- + +## varSampStable + +Calculate the sample variance of a data set. Unlike [`varSamp`](../reference/varsamp.md), this function uses a numerically stable algorithm. It works slower but provides a lower computational error. + +**Syntax** + +```sql +varSampStable(x) +``` + +Alias: `VAR_SAMP_STABLE` + +**Parameters** + +- `x`: The population for which you want to calculate the sample variance. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). + +**Returned value** + +- Returns the sample variance of the input data set. [Float64](../../data-types/float.md). + +**Implementation details** + +The `varSampStable` function calculates the sample variance using the same formula as the [`varSamp`](../reference/varsamp.md): + +$$ +\sum\frac{(x - \text{mean}(x))^2}{(n - 1)} +$$ + +Where: +- `x` is each individual data point in the data set. +- `mean(x)` is the arithmetic mean of the data set. +- `n` is the number of data points in the data set. + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x Float64 +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (10.5), (12.3), (9.8), (11.2), (10.7); + +SELECT round(varSampStable(x),3) AS var_samp_stable FROM test_data; +``` + +Response: + +```response +┌─var_samp_stable─┐ +│ 0.865 │ +└─────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md index 4f1085e65b4..296b70f758e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/welchttest -sidebar_position: 301 +sidebar_position: 214 sidebar_label: welchTTest --- diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index ac9a72c2641..250e766f2b7 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -137,7 +137,7 @@ If the time transition (due to daylight saving time or for other reasons) was pe Non-monotonic calendar dates. For example, in Happy Valley - Goose Bay, the time was transitioned one hour backwards at 00:01:00 7 Nov 2010 (one minute after midnight). So after 6th Nov has ended, people observed a whole one minute of 7th Nov, then time was changed back to 23:01 6th Nov and after another 59 minutes the 7th Nov started again. ClickHouse does not (yet) support this kind of fun. During these days the results of time processing functions may be slightly incorrect. -Similar issue exists for Casey Antarctic station in year 2010. They changed time three hours back at 5 Mar, 02:00. If you are working in antarctic station, please don't afraid to use ClickHouse. Just make sure you set timezone to UTC or be aware of inaccuracies. +Similar issue exists for Casey Antarctic station in year 2010. They changed time three hours back at 5 Mar, 02:00. If you are working in antarctic station, please don't be afraid to use ClickHouse. Just make sure you set timezone to UTC or be aware of inaccuracies. Time shifts for multiple days. Some pacific islands changed their timezone offset from UTC+14 to UTC-12. That's alright but some inaccuracies may present if you do calculations with their timezone for historical time points at the days of conversion. diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 12098efc635..b9b5c6d7a05 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -8,7 +8,7 @@ sidebar_label: Mathematical ## e -Returns e ([Euler's constant](https://en.wikipedia.org/wiki/Euler%27s_constant)). +Returns $e$ ([Euler's constant](https://en.wikipedia.org/wiki/Euler%27s_constant)). **Syntax** @@ -22,7 +22,7 @@ Type: [Float64](../data-types/float.md). ## pi -Returns π ([Pi](https://en.wikipedia.org/wiki/Pi)). +Returns $\pi$ ([Pi](https://en.wikipedia.org/wiki/Pi)). **Syntax** @@ -35,7 +35,7 @@ Type: [Float64](../data-types/float.md). ## exp -Returns e to the power of the given argument. +Returns $e^{x}$, where x is the given argument to the function. **Syntax** @@ -47,6 +47,22 @@ exp(x) - `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +**Example** + +Query: + +```sql +SELECT round(exp(-1), 4); +``` + +Result: + +```response +┌─round(exp(-1), 4)─┐ +│ 0.3679 │ +└───────────────────┘ +``` + **Returned value** Type: [Float*](../data-types/float.md). @@ -91,7 +107,7 @@ Type: [Float*](../data-types/float.md). ## intExp2 -Like `exp` but returns a UInt64. +Like [`exp`](#exp) but returns a UInt64. **Syntax** @@ -137,7 +153,7 @@ Type: [Float*](../data-types/float.md). ## intExp10 -Like `exp10` but returns a UInt64. +Like [`exp10`](#exp10) but returns a UInt64. **Syntax** @@ -197,7 +213,7 @@ Type: [Float*](../data-types/float.md). ## erf -If `x` is non-negative, then `erf(x / σ√2)` is the probability that a random variable having a normal distribution with standard deviation `σ` takes the value that is separated from the expected value by more than `x`. +If `x` is non-negative, then $erf(\frac{x}{\sigma\sqrt{2}})$ is the probability that a random variable having a normal distribution with standard deviation $\sigma$ takes the value that is separated from the expected value by more than `x`. **Syntax** @@ -229,7 +245,7 @@ SELECT erf(3 / sqrt(2)); ## erfc -Returns a number close to `1 - erf(x)` without loss of precision for large ‘x’ values. +Returns a number close to $1-erf(x)$ without loss of precision for large `x` values. **Syntax** @@ -403,7 +419,7 @@ Type: [Float*](../data-types/float.md). ## pow -Returns `x` to the power of `y`. +Returns $x^y$. **Syntax** @@ -434,11 +450,11 @@ cosh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Values from the interval: `1 <= cosh(x) < +∞`. +- Values from the interval: $1 \le cosh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -468,11 +484,11 @@ acosh(x) **Arguments** -- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — Hyperbolic cosine of angle. Values from the interval: $1 \le x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- The angle, in radians. Values from the interval: `0 <= acosh(x) < +∞`. +- The angle, in radians. Values from the interval: $0 \le acosh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -502,11 +518,11 @@ sinh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Values from the interval: `-∞ < sinh(x) < +∞`. +- Values from the interval: $-\infty \lt sinh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -536,11 +552,11 @@ asinh(x) **Arguments** -- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — Hyperbolic sine of angle. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- The angle, in radians. Values from the interval: `-∞ < asinh(x) < +∞`. +- The angle, in radians. Values from the interval: $-\infty \lt asinh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -569,11 +585,11 @@ tanh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Values from the interval: `-1 < tanh(x) < 1`. +- Values from the interval: $-1 \lt tanh(x) \lt 1$. Type: [Float*](../data-types/float.md#float32-float64). @@ -601,11 +617,11 @@ atanh(x) **Arguments** -- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — Hyperbolic tangent of angle. Values from the interval: $-1 \lt x \lt 1$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- The angle, in radians. Values from the interval: `-∞ < atanh(x) < +∞`. +- The angle, in radians. Values from the interval: $-\infty \lt atanh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -640,7 +656,7 @@ atan2(y, x) **Returned value** -- The angle `θ` such that `−π < θ ≤ π`, in radians. +- The angle `θ` such that $-\pi \lt 0 \le \pi$, in radians. Type: [Float64](../data-types/float.md#float32-float64). @@ -705,11 +721,11 @@ log1p(x) **Arguments** -- `x` — Values from the interval: `-1 < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — Values from the interval: $-1 \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Values from the interval: `-∞ < log1p(x) < +∞`. +- Values from the interval: $-\infty < log1p(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -739,7 +755,7 @@ sign(x) **Arguments** -- `x` — Values from `-∞` to `+∞`. Support all numeric types in ClickHouse. +- `x` — Values from $-\infty$ to $+\infty$. Supports all numeric types in ClickHouse. **Returned value** @@ -804,7 +820,7 @@ sigmoid(x) **Parameters** -- `x` — input value. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — input value. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md index 4bfa181a35f..7057ebebfe4 100644 --- a/docs/en/sql-reference/functions/nlp-functions.md +++ b/docs/en/sql-reference/functions/nlp-functions.md @@ -6,26 +6,297 @@ sidebar_label: NLP (experimental) # Natural Language Processing (NLP) Functions -:::note +:::warning This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it. ::: +## detectCharset + +The `detectCharset` function detects the character set of the non-UTF8-encoded input string. + +*Syntax* + +``` sql +detectCharset('text_to_be_analyzed') +``` + +*Arguments* + +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- A `String` containing the code of the detected character set + +*Examples* + +Query: + +```sql +SELECT detectCharset('Ich bleibe für ein paar Tage.'); +``` + +Result: + +```response +┌─detectCharset('Ich bleibe für ein paar Tage.')─┐ +│ WINDOWS-1252 │ +└────────────────────────────────────────────────┘ +``` + +## detectLanguage + +Detects the language of the UTF8-encoded input string. The function uses the [CLD2 library](https://github.com/CLD2Owners/cld2) for detection, and it returns the 2-letter ISO language code. + +The `detectLanguage` function works best when providing over 200 characters in the input string. + +*Syntax* + +``` sql +detectLanguage('text_to_be_analyzed') +``` + +*Arguments* + +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- The 2-letter ISO code of the detected language + +Other possible results: + +- `un` = unknown, can not detect any language. +- `other` = the detected language does not have 2 letter code. + +*Examples* + +Query: + +```sql +SELECT detectLanguage('Je pense que je ne parviendrai jamais à parler français comme un natif. Where there’s a will, there’s a way.'); +``` + +Result: + +```response +fr +``` + +## detectLanguageMixed + +Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text. + + +*Syntax* + +``` sql +detectLanguageMixed('text_to_be_analyzed') +``` + +*Arguments* + +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language + + +*Examples* + +Query: + +```sql +SELECT detectLanguageMixed('二兎を追う者は一兎をも得ず二兎を追う者は一兎をも得ず A vaincre sans peril, on triomphe sans gloire.'); +``` + +Result: + +```response +┌─detectLanguageMixed()─┐ +│ {'ja':0.62,'fr':0.36 │ +└───────────────────────┘ +``` + +## detectProgrammingLanguage + +Determines the programming language from the source code. Calculates all the unigrams and bigrams of commands in the source code. +Then using a marked-up dictionary with weights of unigrams and bigrams of commands for various programming languages finds the biggest weight of the programming language and returns it. + +*Syntax* + +``` sql +detectProgrammingLanguage('source_code') +``` + +*Arguments* + +- `source_code` — String representation of the source code to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- Programming language. [String](../data-types/string.md). + +*Examples* + +Query: + +```sql +SELECT detectProgrammingLanguage('#include '); +``` + +Result: + +```response +┌─detectProgrammingLanguage('#include ')─┐ +│ C++ │ +└──────────────────────────────────────────────────┘ +``` + +## detectLanguageUnknown + +Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32. + + +*Syntax* + +``` sql +detectLanguageUnknown('text_to_be_analyzed') +``` + +*Arguments* + +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- The 2-letter ISO code of the detected language + +Other possible results: + +- `un` = unknown, can not detect any language. +- `other` = the detected language does not have 2 letter code. + +*Examples* + +Query: + +```sql +SELECT detectLanguageUnknown('Ich bleibe für ein paar Tage.'); +``` + +Result: + +```response +┌─detectLanguageUnknown('Ich bleibe für ein paar Tage.')─┐ +│ de │ +└────────────────────────────────────────────────────────┘ +``` + +## detectTonality + +Determines the sentiment of text data. Uses a marked-up sentiment dictionary, in which each word has a tonality ranging from `-12` to `6`. +For each text, it calculates the average sentiment value of its words and returns it in the range `[-1,1]`. + +:::note +This function is limited in its current form. Currently it makes use of the embedded emotional dictionary at `/contrib/nlp-data/tonality_ru.zst` and only works for the Russian language. +::: + +*Syntax* + +``` sql +detectTonality(text) +``` + +*Arguments* + +- `text` — The text to be analyzed. [String](../data-types/string.md#string). + +*Returned value* + +- The average sentiment value of the words in `text`. [Float32](../data-types/float.md). + +*Examples* + +Query: + +```sql +SELECT detectTonality('Шарик - хороший пёс'), -- Sharik is a good dog + detectTonality('Шарик - пёс'), -- Sharik is a dog + detectTonality('Шарик - плохой пёс'); -- Sharkik is a bad dog +``` + +Result: + +```response +┌─detectTonality('Шарик - хороший пёс')─┬─detectTonality('Шарик - пёс')─┬─detectTonality('Шарик - плохой пёс')─┐ +│ 0.44445 │ 0 │ -0.3 │ +└───────────────────────────────────────┴───────────────────────────────┴──────────────────────────────────────┘ +``` +## lemmatize + +Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). + +*Syntax* + +``` sql +lemmatize('language', word) +``` + +*Arguments* + +- `language` — Language which rules will be applied. [String](../data-types/string.md#string). +- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string). + +*Examples* + +Query: + +``` sql +SELECT lemmatize('en', 'wolves'); +``` + +Result: + +``` text +┌─lemmatize("wolves")─┐ +│ "wolf" │ +└─────────────────────┘ +``` + +*Configuration* + +This configuration specifies that the dictionary `en.bin` should be used for lemmatization of English (`en`) words. The `.bin` files can be downloaded from +[here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). + +``` xml + + + + en + en.bin + + + +``` + ## stem Performs stemming on a given word. -### Syntax +*Syntax* ``` sql stem('language', word) ``` -### Arguments +*Arguments* - `language` — Language which rules will be applied. Use the two letter [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). - `word` — word that needs to be stemmed. Must be in lowercase. [String](../data-types/string.md#string). -### Examples +*Examples* Query: @@ -40,7 +311,7 @@ Result: │ ['I','think','it','is','a','bless','in','disguis'] │ └────────────────────────────────────────────────────┘ ``` -### Supported languages for stem() +*Supported languages for stem()* :::note The stem() function uses the [Snowball stemming](https://snowballstem.org/) library, see the Snowball website for updated languages etc. @@ -76,53 +347,6 @@ The stem() function uses the [Snowball stemming](https://snowballstem.org/) libr - Turkish - Yiddish -## lemmatize - -Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). - -### Syntax - -``` sql -lemmatize('language', word) -``` - -### Arguments - -- `language` — Language which rules will be applied. [String](../data-types/string.md#string). -- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string). - -### Examples - -Query: - -``` sql -SELECT lemmatize('en', 'wolves'); -``` - -Result: - -``` text -┌─lemmatize("wolves")─┐ -│ "wolf" │ -└─────────────────────┘ -``` - -### Configuration - -This configuration specifies that the dictionary `en.bin` should be used for lemmatization of English (`en`) words. The `.bin` files can be downloaded from -[here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). - -``` xml - - - - en - en.bin - - - -``` - ## synonyms Finds synonyms to a given word. There are two types of synonym extensions: `plain` and `wordnet`. @@ -131,18 +355,18 @@ With the `plain` extension type we need to provide a path to a simple text file, With the `wordnet` extension type we need to provide a path to a directory with WordNet thesaurus in it. Thesaurus must contain a WordNet sense index. -### Syntax +*Syntax* ``` sql synonyms('extension_name', word) ``` -### Arguments +*Arguments* - `extension_name` — Name of the extension in which search will be performed. [String](../data-types/string.md#string). - `word` — Word that will be searched in extension. [String](../data-types/string.md#string). -### Examples +*Examples* Query: @@ -158,7 +382,7 @@ Result: └──────────────────────────────────────────┘ ``` -### Configuration +*Configuration* ``` xml @@ -172,154 +396,4 @@ Result: en/ -``` - -## detectLanguage - -Detects the language of the UTF8-encoded input string. The function uses the [CLD2 library](https://github.com/CLD2Owners/cld2) for detection, and it returns the 2-letter ISO language code. - -The `detectLanguage` function works best when providing over 200 characters in the input string. - -### Syntax - -``` sql -detectLanguage('text_to_be_analyzed') -``` - -### Arguments - -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). - -### Returned value - -- The 2-letter ISO code of the detected language - -Other possible results: - -- `un` = unknown, can not detect any language. -- `other` = the detected language does not have 2 letter code. - -### Examples - -Query: - -```sql -SELECT detectLanguage('Je pense que je ne parviendrai jamais à parler français comme un natif. Where there’s a will, there’s a way.'); -``` - -Result: - -```response -fr -``` - -## detectLanguageMixed - -Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text. - - -### Syntax - -``` sql -detectLanguageMixed('text_to_be_analyzed') -``` - -### Arguments - -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). - -### Returned value - -- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language - - -### Examples - -Query: - -```sql -SELECT detectLanguageMixed('二兎を追う者は一兎をも得ず二兎を追う者は一兎をも得ず A vaincre sans peril, on triomphe sans gloire.'); -``` - -Result: - -```response -┌─detectLanguageMixed()─┐ -│ {'ja':0.62,'fr':0.36 │ -└───────────────────────┘ -``` - -## detectLanguageUnknown - -Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32. - - -### Syntax - -``` sql -detectLanguageUnknown('text_to_be_analyzed') -``` - -### Arguments - -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). - -### Returned value - -- The 2-letter ISO code of the detected language - -Other possible results: - -- `un` = unknown, can not detect any language. -- `other` = the detected language does not have 2 letter code. - -### Examples - -Query: - -```sql -SELECT detectLanguageUnknown('Ich bleibe für ein paar Tage.'); -``` - -Result: - -```response -┌─detectLanguageUnknown('Ich bleibe für ein paar Tage.')─┐ -│ de │ -└────────────────────────────────────────────────────────┘ -``` - -## detectCharset - -The `detectCharset` function detects the character set of the non-UTF8-encoded input string. - - -### Syntax - -``` sql -detectCharset('text_to_be_analyzed') -``` - -### Arguments - -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). - -### Returned value - -- A `String` containing the code of the detected character set - -### Examples - -Query: - -```sql -SELECT detectCharset('Ich bleibe für ein paar Tage.'); -``` - -Result: - -```response -┌─detectCharset('Ich bleibe für ein paar Tage.')─┐ -│ WINDOWS-1252 │ -└────────────────────────────────────────────────┘ -``` +``` \ No newline at end of file diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index e22dd5d827c..58fc1eba02e 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -3820,3 +3820,43 @@ Result: 10. │ df │ │ └────┴───────────────────────┘ ``` + +## displayName + +Returns the value of `display_name` from [config](../../operations/configuration-files.md/#configuration-files) or server Fully Qualified Domain Name (FQDN) if not set. + +**Syntax** + +```sql +displayName() +``` + +**Returned value** + +- Value of `display_name` from config or server FQDN if not set. [String](../data-types/string.md). + +**Example** + +The `display_name` can be set in `config.xml`. Taking for example a server with `display_name` configured to 'production': + +```xml + +production +``` + +Query: + +```sql +SELECT displayName(); +``` + +Result: + +```response +┌─displayName()─┐ +│ production │ +└───────────────┘ +``` + diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index c535b82d710..c068b0e9d17 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1168,14 +1168,14 @@ Result: └────────────────────────────┘ ``` -## base64UrlEncode +## base64URLEncode Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5). **Syntax** ```sql -base64UrlEncode(url) +base64URLEncode(url) ``` **Arguments** @@ -1189,13 +1189,13 @@ base64UrlEncode(url) **Example** ``` sql -SELECT base64UrlEncode('https://clickhouse.com'); +SELECT base64URLEncode('https://clickhouse.com'); ``` Result: ```result -┌─base64UrlEncode('https://clickhouse.com')─┐ +┌─base64URLEncode('https://clickhouse.com')─┐ │ aHR0cDovL2NsaWNraG91c2UuY29t │ └───────────────────────────────────────────┘ ``` @@ -1234,19 +1234,19 @@ Result: └──────────────────────────────────┘ ``` -## base64UrlDecode +## base64URLDecode Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5). Throws an exception in case of an error. **Syntax** ```sql -base64UrlDecode(encodedUrl) +base64URLDecode(encodedUrl) ``` **Arguments** -- `encodedUrl` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown. +- `encodedURL` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown. **Returned value** @@ -1255,13 +1255,13 @@ base64UrlDecode(encodedUrl) **Example** ``` sql -SELECT base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t'); +SELECT base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t'); ``` Result: ```result -┌─base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐ +┌─base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐ │ https://clickhouse.com │ └─────────────────────────────────────────────────┘ ``` @@ -1298,19 +1298,19 @@ SELECT tryBase64Decode('RW5jb2RlZA==') as res, tryBase64Decode('invalid') as res └────────────┴─────────────┘ ``` -## tryBase64UrlDecode +## tryBase64URLDecode -Like `base64UrlDecode` but returns an empty string in case of error. +Like `base64URLDecode` but returns an empty string in case of error. **Syntax** ```sql -tryBase64UrlDecode(encodedUrl) +tryBase64URLDecode(encodedUrl) ``` **Parameters** -- `encodedUrl`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string. +- `encodedURL`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string. **Returned value** @@ -1321,7 +1321,7 @@ tryBase64UrlDecode(encodedUrl) Query: ```sql -SELECT tryBase64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid; +SELECT tryBase64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid; ``` ```response @@ -2178,6 +2178,32 @@ Result: Alias: levenshteinDistance +## editDistanceUTF8 + +Calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. + +**Syntax** + +```sql +editDistanceUTF8(string1, string2) +``` + +**Examples** + +``` sql +SELECT editDistanceUTF8('我是谁', '我是我'); +``` + +Result: + +``` text +┌─editDistanceUTF8('我是谁', '我是我')──┐ +│ 1 │ +└─────────────────────────────────────┘ +``` + +Alias: levenshteinDistanceUTF8 + ## damerauLevenshteinDistance Calculates the [Damerau-Levenshtein distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) between two byte strings. diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index 8b3e4f44840..76c0141ac8b 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -818,6 +818,40 @@ The same as above, but including query string and fragment. Example: `/top/news.html?page=2#comments`. +### protocol + +Extracts the protocol from a URL. + +**Syntax** + +```sql +protocol(url) +``` + +**Arguments** + +- `url` — URL to extract protocol from. [String](../data-types/string.md). + +**Returned value** + +- Protocol, or an empty string if it cannot be determined. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT protocol('https://clickhouse.com/'); +``` + +Result: + +```response +┌─protocol('https://clickhouse.com/')─┐ +│ https │ +└─────────────────────────────────────┘ +``` + ### queryString Returns the query string without the initial question mark, `#` and everything after `#`. diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index 5f15907d029..e990023efbc 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -543,12 +543,17 @@ serverUUID() Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID). -The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. +The generated Snowflake ID contains the current Unix timestamp in milliseconds (41 + 1 top zero bits), followed by a machine id (10 bits), and a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function `generateSnowflakeID` guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries. +:::note +The generated Snowflake IDs are based on the UNIX epoch 1970-01-01. +While no standard or recommendation exists for the epoch of Snowflake IDs, implementations in other systems may use a different epoch, e.g. Twitter/X (2010-11-04) or Mastodon (2015-01-01). +::: + ``` 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 @@ -605,6 +610,11 @@ SELECT generateSnowflakeID(1), generateSnowflakeID(2); ## snowflakeToDateTime +:::warning +This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled. +The function will be removed at some point in future. +::: + Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](../data-types/datetime.md) format. **Syntax** @@ -641,6 +651,11 @@ Result: ## snowflakeToDateTime64 +:::warning +This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled. +The function will be removed at some point in future. +::: + Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](../data-types/datetime64.md) format. **Syntax** @@ -677,6 +692,11 @@ Result: ## dateTimeToSnowflake +:::warning +This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled. +The function will be removed at some point in future. +::: + Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** @@ -711,6 +731,11 @@ Result: ## dateTime64ToSnowflake +:::warning +This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled. +The function will be removed at some point in future. +::: + Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** @@ -743,6 +768,148 @@ Result: └─────────────────────────────┘ ``` +## snowflakeIDToDateTime + +Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime](../data-types/datetime.md). + +**Syntax** + +``` sql +snowflakeIDToDateTime(value[, epoch[, time_zone]]) +``` + +**Arguments** + +- `value` — Snowflake ID. [UInt64](../data-types/int-uint.md). +- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). + +**Returned value** + +- The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value. + +**Example** + +Query: + +```sql +SELECT snowflakeIDToDateTime(7204436857747984384) AS res +``` + +Result: + +``` +┌─────────────────res─┐ +│ 2024-06-06 10:59:58 │ +└─────────────────────┘ +``` + +## snowflakeIDToDateTime64 + +Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime64](../data-types/datetime64.md). + +**Syntax** + +``` sql +snowflakeIDToDateTime64(value[, epoch[, time_zone]]) +``` + +**Arguments** + +- `value` — Snowflake ID. [UInt64](../data-types/int-uint.md). +- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). + +**Returned value** + +- The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision. + +**Example** + +Query: + +```sql +SELECT snowflakeIDToDateTime64(7204436857747984384) AS res +``` + +Result: + +``` +┌─────────────────res─┐ +│ 2024-06-06 10:59:58 │ +└─────────────────────┘ +``` + +## dateTimeToSnowflakeID + +Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. + +**Syntax** + +``` sql +dateTimeToSnowflakeID(value[, epoch]) +``` + +**Arguments** + +- `value` — Date with time. [DateTime](../data-types/datetime.md). +- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md). + +**Returned value** + +- Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time. + +**Example** + +Query: + +```sql +SELECT toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt, dateTimeToSnowflakeID(dt) AS res; +``` + +Result: + +``` +┌──────────────────dt─┬─────────────────res─┐ +│ 2021-08-15 18:57:56 │ 6832626392367104000 │ +└─────────────────────┴─────────────────────┘ +``` + +## dateTime64ToSnowflakeID + +Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. + +**Syntax** + +``` sql +dateTime64ToSnowflakeID(value[, epoch]) +``` + +**Arguments** + +- `value` — Date with time. [DateTime64](../data-types/datetime64.md). +- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md). + +**Returned value** + +- Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time. + +**Example** + +Query: + +```sql +SELECT toDateTime('2021-08-15 18:57:56.493', 3, 'Asia/Shanghai') AS dt, dateTime64ToSnowflakeID(dt) AS res; +``` + +Result: + +``` +┌──────────────────────dt─┬─────────────────res─┐ +│ 2021-08-15 18:57:56.493 │ 6832626394434895872 │ +└─────────────────────────┴─────────────────────┘ +``` + ## See also - [dictGetUUID](../functions/ext-dict-functions.md#ext_dict_functions-other) diff --git a/docs/en/sql-reference/statements/alter/constraint.md b/docs/en/sql-reference/statements/alter/constraint.md index 29675f704b5..54c456f9aa2 100644 --- a/docs/en/sql-reference/statements/alter/constraint.md +++ b/docs/en/sql-reference/statements/alter/constraint.md @@ -9,8 +9,8 @@ sidebar_label: CONSTRAINT Constraints could be added or deleted using following syntax: ``` sql -ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT constraint_name CHECK expression; -ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name; +ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression; +ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name; ``` See more on [constraints](../../../sql-reference/statements/create/table.md#constraints). diff --git a/docs/en/sql-reference/statements/alter/named-collection.md b/docs/en/sql-reference/statements/alter/named-collection.md index 71d4bfadd9c..ab772fe4dcf 100644 --- a/docs/en/sql-reference/statements/alter/named-collection.md +++ b/docs/en/sql-reference/statements/alter/named-collection.md @@ -3,6 +3,10 @@ slug: /en/sql-reference/statements/alter/named-collection sidebar_label: NAMED COLLECTION --- +import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge'; + + + # ALTER NAMED COLLECTION This query intends to modify already existing named collections. diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index fb7a5bd7c03..5f3dae0a9c0 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -134,8 +134,8 @@ PRIMARY KEY (event_type, ts) ORDER BY (event_type, ts, browser) SETTINGS index_granularity = 8192 --- !!! The columns' definition is unchanged but it does not matter, we are not quering --- MATERIALIZED VIEW, we are quering TO (storage) table. +-- !!! The columns' definition is unchanged but it does not matter, we are not querying +-- MATERIALIZED VIEW, we are querying TO (storage) table. -- SELECT section is updated. SHOW CREATE TABLE mv FORMAT TSVRaw; diff --git a/docs/en/sql-reference/statements/create/named-collection.md b/docs/en/sql-reference/statements/create/named-collection.md index f69fa2e3678..a4e146c814c 100644 --- a/docs/en/sql-reference/statements/create/named-collection.md +++ b/docs/en/sql-reference/statements/create/named-collection.md @@ -3,6 +3,10 @@ slug: /en/sql-reference/statements/create/named-collection sidebar_label: NAMED COLLECTION --- +import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge'; + + + # CREATE NAMED COLLECTION Creates a new named collection. diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 32ebc6d028f..3a8afd10359 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -36,7 +36,7 @@ Finds non-negative derivative for given `metric_column` by `timestamp_column`. `INTERVAL` can be omitted, default is `INTERVAL 1 SECOND`. The computed value is the following for each row: - `0` for 1st row, -- ${metric_i - metric_{i-1} \over timestamp_i - timestamp_{i-1}} * interval$ for $i_th$ row. +- ${\text{metric}_i - \text{metric}_{i-1} \over \text{timestamp}_i - \text{timestamp}_{i-1}} * \text{interval}$ for $i_{th}$ row. ## Syntax diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 01ff4dd5f28..bf42edf89ff 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -283,7 +283,7 @@ Pull request можно создать, даже если работа над з Тесты будут запущены, как только сотрудники ClickHouse поставят для pull request тег «Can be tested». Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа. -Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Clickhouse build check». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно). +Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Builds». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно). Вероятнее всего, часть сборок не будет успешной с первого раза. Ведь мы проверяем сборку кода и gcc и clang, а при сборке с помощью clang включаются почти все существующие в природе warnings (всегда с флагом `-Werror`). На той же странице, вы сможете найти логи сборки - вам не обязательно самому собирать ClickHouse всеми возможными способами. diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index 4d19cf50ae1..86eeaac2da7 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -141,6 +141,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe - `--secure` — если указано, будет использован безопасный канал. - `--history_file` - путь к файлу с историей команд. - `--param_` — значение параметра для [запроса с параметрами](#cli-queries-with-parameters). +- `--jwt` – авторизация с использованием JSON Web Token. Доступно только в ClickHouse Cloud. Вместо параметров `--host`, `--port`, `--user` и `--password` клиент ClickHouse также поддерживает строки подключения (смотри следующий раздел). diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index fa76e84f130..2436581fc7f 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -538,7 +538,7 @@ SELECT base58Decode('3dc8KtHrwM'); Синоним: `TO_BASE64`. -## base64UrlEncode(s) +## base64URLEncode(s) Производит кодирование URL (String или FixedString) в base64-представление в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648). @@ -548,7 +548,7 @@ SELECT base58Decode('3dc8KtHrwM'); Синоним: `FROM_BASE64`. -## base64UrlDecode(s) +## base64URLDecode(s) Декодирует base64-представление URL в исходную строку в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648). При невозможности декодирования выбрасывает исключение @@ -556,9 +556,9 @@ SELECT base58Decode('3dc8KtHrwM'); Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку. -## tryBase64UrlDecode(s) +## tryBase64URLDecode(s) -Функционал аналогичен base64UrlDecode, но при невозможности декодирования возвращает пустую строку. +Функционал аналогичен base64URLDecode, но при невозможности декодирования возвращает пустую строку. ## endsWith(s, suffix) {#endswith} diff --git a/docs/ru/sql-reference/statements/alter/constraint.md b/docs/ru/sql-reference/statements/alter/constraint.md index ad5f23e5fdc..45b0f5f6350 100644 --- a/docs/ru/sql-reference/statements/alter/constraint.md +++ b/docs/ru/sql-reference/statements/alter/constraint.md @@ -11,8 +11,8 @@ sidebar_label: "Манипуляции с ограничениями" Добавить или удалить ограничение можно с помощью запросов ``` sql -ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT constraint_name CHECK expression; -ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name; +ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression; +ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name; ``` Запросы выполняют добавление или удаление метаданных об ограничениях таблицы `[db].name`, поэтому выполняются мгновенно. diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index 67bd681269b..d5ece5b23a9 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -201,18 +201,18 @@ ClickHouse 不要求主键唯一,所以您可以插入多条具有相同主键 主键中列的数量并没有明确的限制。依据数据结构,您可以在主键包含多些或少些列。这样可以: - - 改善索引的性能。 +- 改善索引的性能。 - - 如果当前主键是 `(a, b)` ,在下列情况下添加另一个 `c` 列会提升性能: + 如果当前主键是 `(a, b)` ,在下列情况下添加另一个 `c` 列会提升性能: - - 查询会使用 `c` 列作为条件 - - 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让您的查询略过很长的数据范围。 + - 查询会使用 `c` 列作为条件 + - 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让您的查询略过很长的数据范围。 - - 改善数据压缩。 +- 改善数据压缩。 - ClickHouse 以主键排序片段数据,所以,数据的一致性越高,压缩越好。 + ClickHouse 以主键排序片段数据,所以,数据的一致性越高,压缩越好。 - - 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。 +- 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。 在这种情况下,指定与主键不同的 *排序键* 也是有意义的。 diff --git a/docs/zh/sql-reference/statements/alter/constraint.md b/docs/zh/sql-reference/statements/alter/constraint.md index 86ffcf09d65..59edcf10645 100644 --- a/docs/zh/sql-reference/statements/alter/constraint.md +++ b/docs/zh/sql-reference/statements/alter/constraint.md @@ -9,8 +9,8 @@ sidebar_label: 约束 约束可以使用以下语法添加或删除: ``` sql -ALTER TABLE [db].name ADD CONSTRAINT constraint_name CHECK expression; -ALTER TABLE [db].name DROP CONSTRAINT constraint_name; +ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression; +ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name; ``` 查看[constraints](../../../sql-reference/statements/create/table.mdx#constraints)。 diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index efe23d57478..6343dc85d00 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -64,6 +64,7 @@ namespace ErrorCodes extern const int NETWORK_ERROR; extern const int AUTHENTICATION_FAILED; extern const int NO_ELEMENTS_IN_CONFIG; + extern const int USER_EXPIRED; } @@ -74,6 +75,12 @@ void Client::processError(const String & query) const fmt::print(stderr, "Received exception from server (version {}):\n{}\n", server_version, getExceptionMessage(*server_exception, print_stack_trace, true)); + + if (server_exception->code() == ErrorCodes::USER_EXPIRED) + { + server_exception->rethrow(); + } + if (is_interactive) { fmt::print(stderr, "\n"); @@ -241,6 +248,10 @@ std::vector Client::loadWarningMessages() } } +Poco::Util::LayeredConfiguration & Client::getClientConfiguration() +{ + return config(); +} void Client::initialize(Poco::Util::Application & self) { @@ -690,9 +701,7 @@ bool Client::processWithFuzzing(const String & full_query) const char * begin = full_query.data(); orig_ast = parseQuery(begin, begin + full_query.size(), global_context->getSettingsRef(), - /*allow_multi_statements=*/ true, - /*is_interactive=*/ is_interactive, - /*ignore_error=*/ ignore_error); + /*allow_multi_statements=*/ true); } catch (const Exception & e) { @@ -944,6 +953,7 @@ void Client::addOptions(OptionsDescription & options_description) ("ssh-key-file", po::value(), "File containing the SSH private key for authenticate with the server.") ("ssh-key-passphrase", po::value(), "Passphrase for the SSH private key specified by --ssh-key-file.") ("quota_key", po::value(), "A string to differentiate quotas when the user have keyed quotas configured on server") + ("jwt", po::value(), "Use JWT for authentication") ("max_client_network_bandwidth", po::value(), "the maximum speed of data exchange over the network for the client in bytes per second.") ("compression", po::value(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).") @@ -1102,6 +1112,12 @@ void Client::processOptions(const OptionsDescription & options_description, config().setBool("no-warnings", true); if (options.count("fake-drop")) config().setString("ignore_drop_queries_probability", "1"); + if (options.count("jwt")) + { + if (!options["user"].defaulted()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "User and JWT flags can't be specified together"); + config().setString("jwt", options["jwt"].as()); + } if (options.count("accept-invalid-certificate")) { config().setString("openSSL.client.invalidCertificateHandler.name", "AcceptCertificateHandler"); diff --git a/programs/client/Client.h b/programs/client/Client.h index bef948b3c1e..229608f787d 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -16,6 +16,9 @@ public: int main(const std::vector & /*args*/) override; protected: + + Poco::Util::LayeredConfiguration & getClientConfiguration() override; + bool processWithFuzzing(const String & full_query) override; std::optional processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query); diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index ebec337060c..a20c1f686f3 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -368,7 +368,7 @@ int KeeperClient::main(const std::vector & /* args */) return 0; } - DB::ConfigProcessor config_processor(config().getString("config-file", "config.xml")); + ConfigProcessor config_processor(config().getString("config-file", "config.xml")); /// This will handle a situation when clickhouse is running on the embedded config, but config.d folder is also present. ConfigProcessor::registerEmbeddedConfig("config.xml", ""); @@ -383,6 +383,9 @@ int KeeperClient::main(const std::vector & /* args */) for (const auto & key : keys) { + if (key != "node") + continue; + String prefix = "zookeeper." + key; String host = clickhouse_config.configuration->getString(prefix + ".host"); String port = clickhouse_config.configuration->getString(prefix + ".port"); @@ -401,6 +404,7 @@ int KeeperClient::main(const std::vector & /* args */) zk_args.hosts.push_back(host + ":" + port); } + zk_args.availability_zones.resize(zk_args.hosts.size()); zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000; zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000; zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000; diff --git a/programs/keeper-client/Parser.cpp b/programs/keeper-client/Parser.cpp index 5b16e6d2c23..51f85cf4a69 100644 --- a/programs/keeper-client/Parser.cpp +++ b/programs/keeper-client/Parser.cpp @@ -12,8 +12,7 @@ bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result) if (!parseIdentifierOrStringLiteral(pos, expected, result)) return false; } - - while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream && pos->type != TokenType::Semicolon) + else if (pos->type == TokenType::Number) { result.append(pos->begin, pos->end); ++pos; @@ -40,8 +39,8 @@ bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) for (const auto & pair : KeeperClient::commands) expected.add(pos, pair.first.data()); - for (const auto & flwc : four_letter_word_commands) - expected.add(pos, flwc.data()); + for (const auto & four_letter_word_command : four_letter_word_commands) + expected.add(pos, four_letter_word_command.data()); if (pos->type != TokenType::BareWord) return false; diff --git a/programs/keeper-client/Parser.h b/programs/keeper-client/Parser.h index 57ee6ce4a18..503edfa4f73 100644 --- a/programs/keeper-client/Parser.h +++ b/programs/keeper-client/Parser.h @@ -11,7 +11,6 @@ namespace DB { bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result); - bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path); diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 0d3c1f10894..f14ef2e5552 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -355,10 +355,7 @@ try std::string include_from_path = config().getString("include_from", "/etc/metrika.xml"); - if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX)) - { - PlacementInfo::PlacementInfo::instance().initialize(config()); - } + PlacementInfo::PlacementInfo::instance().initialize(config()); GlobalThreadPool::initialize( /// We need to have sufficient amount of threads for connections + nuraft workers + keeper workers, 1000 is an estimation @@ -577,8 +574,7 @@ try #if USE_SSL CertificateReloader::instance().tryLoad(*config); #endif - }, - /* already_loaded = */ false); /// Reload it right now (initial loading) + }); SCOPE_EXIT({ LOG_INFO(log, "Shutting down."); diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 2fca10ce4d7..86410d712ec 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -11,7 +11,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES LibraryBridgeHandlers.cpp SharedLibrary.cpp library-bridge.cpp - createFunctionBaseCast.cpp ) clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) @@ -20,6 +19,7 @@ target_link_libraries(clickhouse-library-bridge PRIVATE daemon dbms bridge + clickhouse_functions ) set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 4d5cfb09e6a..b33e1595056 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -59,8 +60,13 @@ # include #endif + namespace fs = std::filesystem; +namespace CurrentMetrics +{ + extern const Metric MemoryTracking; +} namespace DB { @@ -82,6 +88,11 @@ void applySettingsOverridesForLocal(ContextMutablePtr context) context->setSettings(settings); } +Poco::Util::LayeredConfiguration & LocalServer::getClientConfiguration() +{ + return config(); +} + void LocalServer::processError(const String &) const { if (ignore_error) @@ -117,20 +128,21 @@ void LocalServer::initialize(Poco::Util::Application & self) Poco::Util::Application::initialize(self); /// Load config files if exists - if (config().has("config-file") || fs::exists("config.xml")) + if (getClientConfiguration().has("config-file") || fs::exists("config.xml")) { - const auto config_path = config().getString("config-file", "config.xml"); + const auto config_path = getClientConfiguration().getString("config-file", "config.xml"); ConfigProcessor config_processor(config_path, false, true); ConfigProcessor::setConfigPath(fs::path(config_path).parent_path()); auto loaded_config = config_processor.loadConfig(); - config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); + getClientConfiguration().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); } + server_settings.loadSettingsFromConfig(config()); + GlobalThreadPool::initialize( - config().getUInt("max_thread_pool_size", 10000), - config().getUInt("max_thread_pool_free_size", 1000), - config().getUInt("thread_pool_queue_size", 10000) - ); + server_settings.max_thread_pool_size, + server_settings.max_thread_pool_free_size, + server_settings.thread_pool_queue_size); #if USE_AZURE_BLOB_STORAGE /// See the explanation near the same line in Server.cpp @@ -141,18 +153,17 @@ void LocalServer::initialize(Poco::Util::Application & self) #endif getIOThreadPool().initialize( - config().getUInt("max_io_thread_pool_size", 100), - config().getUInt("max_io_thread_pool_free_size", 0), - config().getUInt("io_thread_pool_queue_size", 10000)); + server_settings.max_io_thread_pool_size, + server_settings.max_io_thread_pool_free_size, + server_settings.io_thread_pool_queue_size); - - const size_t active_parts_loading_threads = config().getUInt("max_active_parts_loading_thread_pool_size", 64); + const size_t active_parts_loading_threads = server_settings.max_active_parts_loading_thread_pool_size; getActivePartsLoadingThreadPool().initialize( active_parts_loading_threads, 0, // We don't need any threads one all the parts will be loaded active_parts_loading_threads); - const size_t outdated_parts_loading_threads = config().getUInt("max_outdated_parts_loading_thread_pool_size", 32); + const size_t outdated_parts_loading_threads = server_settings.max_outdated_parts_loading_thread_pool_size; getOutdatedPartsLoadingThreadPool().initialize( outdated_parts_loading_threads, 0, // We don't need any threads one all the parts will be loaded @@ -160,7 +171,7 @@ void LocalServer::initialize(Poco::Util::Application & self) getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads); - const size_t unexpected_parts_loading_threads = config().getUInt("max_unexpected_parts_loading_thread_pool_size", 32); + const size_t unexpected_parts_loading_threads = server_settings.max_unexpected_parts_loading_thread_pool_size; getUnexpectedPartsLoadingThreadPool().initialize( unexpected_parts_loading_threads, 0, // We don't need any threads one all the parts will be loaded @@ -168,7 +179,7 @@ void LocalServer::initialize(Poco::Util::Application & self) getUnexpectedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads); - const size_t cleanup_threads = config().getUInt("max_parts_cleaning_thread_pool_size", 128); + const size_t cleanup_threads = server_settings.max_parts_cleaning_thread_pool_size; getPartsCleaningThreadPool().initialize( cleanup_threads, 0, // We don't need any threads one all the parts will be deleted @@ -201,10 +212,10 @@ void LocalServer::tryInitPath() { std::string path; - if (config().has("path")) + if (getClientConfiguration().has("path")) { // User-supplied path. - path = config().getString("path"); + path = getClientConfiguration().getString("path"); Poco::trimInPlace(path); if (path.empty()) @@ -263,13 +274,13 @@ void LocalServer::tryInitPath() global_context->setUserFilesPath(""); /// user's files are everywhere - std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/"); + std::string user_scripts_path = getClientConfiguration().getString("user_scripts_path", fs::path(path) / "user_scripts/"); global_context->setUserScriptsPath(user_scripts_path); /// top_level_domains_lists - const std::string & top_level_domains_path = config().getString("top_level_domains_path", fs::path(path) / "top_level_domains/"); + const std::string & top_level_domains_path = getClientConfiguration().getString("top_level_domains_path", fs::path(path) / "top_level_domains/"); if (!top_level_domains_path.empty()) - TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config()); + TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", getClientConfiguration()); } @@ -311,14 +322,14 @@ void LocalServer::cleanup() std::string LocalServer::getInitialCreateTableQuery() { - if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!isRegularFile(STDIN_FILENO) || queries.empty())) + if (!getClientConfiguration().has("table-structure") && !getClientConfiguration().has("table-file") && !getClientConfiguration().has("table-data-format") && (!isRegularFile(STDIN_FILENO) || queries.empty())) return {}; - auto table_name = backQuoteIfNeed(config().getString("table-name", "table")); - auto table_structure = config().getString("table-structure", "auto"); + auto table_name = backQuoteIfNeed(getClientConfiguration().getString("table-name", "table")); + auto table_structure = getClientConfiguration().getString("table-structure", "auto"); String table_file; - if (!config().has("table-file") || config().getString("table-file") == "-") + if (!getClientConfiguration().has("table-file") || getClientConfiguration().getString("table-file") == "-") { /// Use Unix tools stdin naming convention table_file = "stdin"; @@ -326,7 +337,7 @@ std::string LocalServer::getInitialCreateTableQuery() else { /// Use regular file - auto file_name = config().getString("table-file"); + auto file_name = getClientConfiguration().getString("table-file"); table_file = quoteString(file_name); } @@ -374,18 +385,18 @@ void LocalServer::setupUsers() ConfigurationPtr users_config; auto & access_control = global_context->getAccessControl(); - access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true)); - access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true)); - if (config().has("config-file") || fs::exists("config.xml")) + access_control.setNoPasswordAllowed(getClientConfiguration().getBool("allow_no_password", true)); + access_control.setPlaintextPasswordAllowed(getClientConfiguration().getBool("allow_plaintext_password", true)); + if (getClientConfiguration().has("config-file") || fs::exists("config.xml")) { - String config_path = config().getString("config-file", ""); - bool has_user_directories = config().has("user_directories"); + String config_path = getClientConfiguration().getString("config-file", ""); + bool has_user_directories = getClientConfiguration().has("user_directories"); const auto config_dir = fs::path{config_path}.remove_filename().string(); - String users_config_path = config().getString("users_config", ""); + String users_config_path = getClientConfiguration().getString("users_config", ""); if (users_config_path.empty() && has_user_directories) { - users_config_path = config().getString("user_directories.users_xml.path"); + users_config_path = getClientConfiguration().getString("user_directories.users_xml.path"); if (fs::path(users_config_path).is_relative() && fs::exists(fs::path(config_dir) / users_config_path)) users_config_path = fs::path(config_dir) / users_config_path; } @@ -409,10 +420,10 @@ void LocalServer::setupUsers() void LocalServer::connect() { - connection_parameters = ConnectionParameters(config(), "localhost"); + connection_parameters = ConnectionParameters(getClientConfiguration(), "localhost"); ReadBuffer * in; - auto table_file = config().getString("table-file", "-"); + auto table_file = getClientConfiguration().getString("table-file", "-"); if (table_file == "-" || table_file == "stdin") { in = &std_in; @@ -433,7 +444,7 @@ try UseSSL use_ssl; thread_status.emplace(); - StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true)); + StackTrace::setShowAddresses(server_settings.show_addresses_in_stack_traces); setupSignalHandler(); @@ -448,7 +459,7 @@ try if (rlim.rlim_cur < rlim.rlim_max) { - rlim.rlim_cur = config().getUInt("max_open_files", static_cast(rlim.rlim_max)); + rlim.rlim_cur = getClientConfiguration().getUInt("max_open_files", static_cast(rlim.rlim_max)); int rc = setrlimit(RLIMIT_NOFILE, &rlim); if (rc != 0) std::cerr << fmt::format("Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()) << '\n'; @@ -456,8 +467,8 @@ try } is_interactive = stdin_is_a_tty - && (config().hasOption("interactive") - || (queries.empty() && !config().has("table-structure") && queries_files.empty() && !config().has("table-file"))); + && (getClientConfiguration().hasOption("interactive") + || (queries.empty() && !getClientConfiguration().has("table-structure") && queries_files.empty() && !getClientConfiguration().has("table-file"))); if (!is_interactive) { @@ -481,7 +492,7 @@ try SCOPE_EXIT({ cleanup(); }); - initTTYBuffer(toProgressOption(config().getString("progress", "default"))); + initTTYBuffer(toProgressOption(getClientConfiguration().getString("progress", "default"))); ASTAlterCommand::setFormatAlterCommandsWithParentheses(true); applyCmdSettings(global_context); @@ -489,7 +500,7 @@ try /// try to load user defined executable functions, throw on error and die try { - global_context->loadOrReloadUserDefinedExecutableFunctions(config()); + global_context->loadOrReloadUserDefinedExecutableFunctions(getClientConfiguration()); } catch (...) { @@ -530,7 +541,7 @@ try } catch (const DB::Exception & e) { - bool need_print_stack_trace = config().getBool("stacktrace", false); + bool need_print_stack_trace = getClientConfiguration().getBool("stacktrace", false); std::cerr << getExceptionMessage(e, need_print_stack_trace, true) << std::endl; return e.code() ? e.code() : -1; } @@ -542,42 +553,42 @@ catch (...) void LocalServer::updateLoggerLevel(const String & logs_level) { - config().setString("logger.level", logs_level); - updateLevels(config(), logger()); + getClientConfiguration().setString("logger.level", logs_level); + updateLevels(getClientConfiguration(), logger()); } void LocalServer::processConfig() { - if (!queries.empty() && config().has("queries-file")) + if (!queries.empty() && getClientConfiguration().has("queries-file")) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time"); - if (config().has("multiquery")) + if (getClientConfiguration().has("multiquery")) is_multiquery = true; - pager = config().getString("pager", ""); + pager = getClientConfiguration().getString("pager", ""); - delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file")); + delayed_interactive = getClientConfiguration().has("interactive") && (!queries.empty() || getClientConfiguration().has("queries-file")); if (!is_interactive || delayed_interactive) { - echo_queries = config().hasOption("echo") || config().hasOption("verbose"); - ignore_error = config().getBool("ignore-error", false); + echo_queries = getClientConfiguration().hasOption("echo") || getClientConfiguration().hasOption("verbose"); + ignore_error = getClientConfiguration().getBool("ignore-error", false); } - print_stack_trace = config().getBool("stacktrace", false); + print_stack_trace = getClientConfiguration().getBool("stacktrace", false); const std::string clickhouse_dialect{"clickhouse"}; - load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false) - && config().getString("dialect", clickhouse_dialect) == clickhouse_dialect; - wait_for_suggestions_to_load = config().getBool("wait_for_suggestions_to_load", false); + load_suggestions = (is_interactive || delayed_interactive) && !getClientConfiguration().getBool("disable_suggestion", false) + && getClientConfiguration().getString("dialect", clickhouse_dialect) == clickhouse_dialect; + wait_for_suggestions_to_load = getClientConfiguration().getBool("wait_for_suggestions_to_load", false); - auto logging = (config().has("logger.console") - || config().has("logger.level") - || config().has("log-level") - || config().has("send_logs_level") - || config().has("logger.log")); + auto logging = (getClientConfiguration().has("logger.console") + || getClientConfiguration().has("logger.level") + || getClientConfiguration().has("log-level") + || getClientConfiguration().has("send_logs_level") + || getClientConfiguration().has("logger.log")); - auto level = config().getString("log-level", "trace"); + auto level = getClientConfiguration().getString("log-level", "trace"); - if (config().has("server_logs_file")) + if (getClientConfiguration().has("server_logs_file")) { auto poco_logs_level = Poco::Logger::parseLevel(level); Poco::Logger::root().setLevel(poco_logs_level); @@ -587,10 +598,10 @@ void LocalServer::processConfig() } else { - config().setString("logger", "logger"); + getClientConfiguration().setString("logger", "logger"); auto log_level_default = logging ? level : "fatal"; - config().setString("logger.level", config().getString("log-level", config().getString("send_logs_level", log_level_default))); - buildLoggers(config(), logger(), "clickhouse-local"); + getClientConfiguration().setString("logger.level", getClientConfiguration().getString("log-level", getClientConfiguration().getString("send_logs_level", log_level_default))); + buildLoggers(getClientConfiguration(), logger(), "clickhouse-local"); } shared_context = Context::createShared(); @@ -604,13 +615,13 @@ void LocalServer::processConfig() LoggerRawPtr log = &logger(); /// Maybe useless - if (config().has("macros")) - global_context->setMacros(std::make_unique(config(), "macros", log)); + if (getClientConfiguration().has("macros")) + global_context->setMacros(std::make_unique(getClientConfiguration(), "macros", log)); setDefaultFormatsAndCompressionFromConfiguration(); /// Sets external authenticators config (LDAP, Kerberos). - global_context->setExternalAuthenticatorsConfig(config()); + global_context->setExternalAuthenticatorsConfig(getClientConfiguration()); setupUsers(); @@ -619,12 +630,43 @@ void LocalServer::processConfig() global_context->getProcessList().setMaxSize(0); const size_t physical_server_memory = getMemoryAmount(); - const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5); + + size_t max_server_memory_usage = server_settings.max_server_memory_usage; + double max_server_memory_usage_to_ram_ratio = server_settings.max_server_memory_usage_to_ram_ratio; + + size_t default_max_server_memory_usage = static_cast(physical_server_memory * max_server_memory_usage_to_ram_ratio); + + if (max_server_memory_usage == 0) + { + max_server_memory_usage = default_max_server_memory_usage; + LOG_INFO(log, "Setting max_server_memory_usage was set to {}" + " ({} available * {:.2f} max_server_memory_usage_to_ram_ratio)", + formatReadableSizeWithBinarySuffix(max_server_memory_usage), + formatReadableSizeWithBinarySuffix(physical_server_memory), + max_server_memory_usage_to_ram_ratio); + } + else if (max_server_memory_usage > default_max_server_memory_usage) + { + max_server_memory_usage = default_max_server_memory_usage; + LOG_INFO(log, "Setting max_server_memory_usage was lowered to {}" + " because the system has low amount of memory. The amount was" + " calculated as {} available" + " * {:.2f} max_server_memory_usage_to_ram_ratio", + formatReadableSizeWithBinarySuffix(max_server_memory_usage), + formatReadableSizeWithBinarySuffix(physical_server_memory), + max_server_memory_usage_to_ram_ratio); + } + + total_memory_tracker.setHardLimit(max_server_memory_usage); + total_memory_tracker.setDescription("(total)"); + total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); + + const double cache_size_to_ram_max_ratio = server_settings.cache_size_to_ram_max_ratio; const size_t max_cache_size = static_cast(physical_server_memory * cache_size_to_ram_max_ratio); - String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY); - size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE); - double uncompressed_cache_size_ratio = config().getDouble("uncompressed_cache_size_ratio", DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO); + String uncompressed_cache_policy = server_settings.uncompressed_cache_policy; + size_t uncompressed_cache_size = server_settings.uncompressed_cache_size; + double uncompressed_cache_size_ratio = server_settings.uncompressed_cache_size_ratio; if (uncompressed_cache_size > max_cache_size) { uncompressed_cache_size = max_cache_size; @@ -632,9 +674,9 @@ void LocalServer::processConfig() } global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio); - String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY); - size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE); - double mark_cache_size_ratio = config().getDouble("mark_cache_size_ratio", DEFAULT_MARK_CACHE_SIZE_RATIO); + String mark_cache_policy = server_settings.mark_cache_policy; + size_t mark_cache_size = server_settings.mark_cache_size; + double mark_cache_size_ratio = server_settings.mark_cache_size_ratio; if (!mark_cache_size) LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation."); if (mark_cache_size > max_cache_size) @@ -644,9 +686,9 @@ void LocalServer::processConfig() } global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio); - String index_uncompressed_cache_policy = config().getString("index_uncompressed_cache_policy", DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY); - size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE); - double index_uncompressed_cache_size_ratio = config().getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO); + String index_uncompressed_cache_policy = server_settings.index_uncompressed_cache_policy; + size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size; + double index_uncompressed_cache_size_ratio = server_settings.index_uncompressed_cache_size_ratio; if (index_uncompressed_cache_size > max_cache_size) { index_uncompressed_cache_size = max_cache_size; @@ -654,9 +696,9 @@ void LocalServer::processConfig() } global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio); - String index_mark_cache_policy = config().getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY); - size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE); - double index_mark_cache_size_ratio = config().getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO); + String index_mark_cache_policy = server_settings.index_mark_cache_policy; + size_t index_mark_cache_size = server_settings.index_mark_cache_size; + double index_mark_cache_size_ratio = server_settings.index_mark_cache_size_ratio; if (index_mark_cache_size > max_cache_size) { index_mark_cache_size = max_cache_size; @@ -664,7 +706,7 @@ void LocalServer::processConfig() } global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio); - size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE); + size_t mmap_cache_size = server_settings.mmap_cache_size; if (mmap_cache_size > max_cache_size) { mmap_cache_size = max_cache_size; @@ -676,8 +718,8 @@ void LocalServer::processConfig() global_context->setQueryCache(0, 0, 0, 0); #if USE_EMBEDDED_COMPILER - size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE); - size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES); + size_t compiled_expression_cache_max_size_in_bytes = server_settings.compiled_expression_cache_size; + size_t compiled_expression_cache_max_elements = server_settings.compiled_expression_cache_elements_size; CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements); #endif @@ -689,16 +731,16 @@ void LocalServer::processConfig() applyCmdOptions(global_context); /// Load global settings from default_profile and system_profile. - global_context->setDefaultProfiles(config()); + global_context->setDefaultProfiles(getClientConfiguration()); /// We load temporary database first, because projections need it. DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase(); - std::string default_database = config().getString("default_database", "default"); + std::string default_database = server_settings.default_database; DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context)); global_context->setCurrentDatabase(default_database); - if (config().has("path")) + if (getClientConfiguration().has("path")) { String path = global_context->getPath(); fs::create_directories(fs::path(path)); @@ -713,7 +755,7 @@ void LocalServer::processConfig() attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); waitLoad(TablesLoaderForegroundPoolId, startup_system_tasks); - if (!config().has("only-system-tables")) + if (!getClientConfiguration().has("only-system-tables")) { DatabaseCatalog::instance().createBackgroundTasks(); waitLoad(loadMetadata(global_context)); @@ -725,18 +767,15 @@ void LocalServer::processConfig() LOG_DEBUG(log, "Loaded metadata."); } - else if (!config().has("no-system-tables")) + else if (!getClientConfiguration().has("no-system-tables")) { attachSystemTablesServer(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE), false); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA)); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); } - server_display_name = config().getString("display_name", getFQDNOrHostName()); - prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", "{display_name} :) "); - std::map prompt_substitutions{{"display_name", server_display_name}}; - for (const auto & [key, value] : prompt_substitutions) - boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value); + server_display_name = getClientConfiguration().getString("display_name", ""); + prompt_by_server_display_name = getClientConfiguration().getRawString("prompt_by_server_display_name.default", ":) "); global_context->setQueryKindInitial(); global_context->setQueryKind(query_kind); @@ -814,7 +853,7 @@ void LocalServer::applyCmdSettings(ContextMutablePtr context) void LocalServer::applyCmdOptions(ContextMutablePtr context) { - context->setDefaultFormat(config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"))); + context->setDefaultFormat(getClientConfiguration().getString("output-format", getClientConfiguration().getString("format", is_interactive ? "PrettyCompact" : "TSV"))); applyCmdSettings(context); } @@ -822,33 +861,33 @@ void LocalServer::applyCmdOptions(ContextMutablePtr context) void LocalServer::processOptions(const OptionsDescription &, const CommandLineOptions & options, const std::vector &, const std::vector &) { if (options.count("table")) - config().setString("table-name", options["table"].as()); + getClientConfiguration().setString("table-name", options["table"].as()); if (options.count("file")) - config().setString("table-file", options["file"].as()); + getClientConfiguration().setString("table-file", options["file"].as()); if (options.count("structure")) - config().setString("table-structure", options["structure"].as()); + getClientConfiguration().setString("table-structure", options["structure"].as()); if (options.count("no-system-tables")) - config().setBool("no-system-tables", true); + getClientConfiguration().setBool("no-system-tables", true); if (options.count("only-system-tables")) - config().setBool("only-system-tables", true); + getClientConfiguration().setBool("only-system-tables", true); if (options.count("database")) - config().setString("default_database", options["database"].as()); + getClientConfiguration().setString("default_database", options["database"].as()); if (options.count("input-format")) - config().setString("table-data-format", options["input-format"].as()); + getClientConfiguration().setString("table-data-format", options["input-format"].as()); if (options.count("output-format")) - config().setString("output-format", options["output-format"].as()); + getClientConfiguration().setString("output-format", options["output-format"].as()); if (options.count("logger.console")) - config().setBool("logger.console", options["logger.console"].as()); + getClientConfiguration().setBool("logger.console", options["logger.console"].as()); if (options.count("logger.log")) - config().setString("logger.log", options["logger.log"].as()); + getClientConfiguration().setString("logger.log", options["logger.log"].as()); if (options.count("logger.level")) - config().setString("logger.level", options["logger.level"].as()); + getClientConfiguration().setString("logger.level", options["logger.level"].as()); if (options.count("send_logs_level")) - config().setString("send_logs_level", options["send_logs_level"].as()); + getClientConfiguration().setString("send_logs_level", options["send_logs_level"].as()); if (options.count("wait_for_suggestions_to_load")) - config().setBool("wait_for_suggestions_to_load", true); + getClientConfiguration().setBool("wait_for_suggestions_to_load", true); } void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector &, std::vector &) diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index 4856e68ff9b..da2466650a7 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -30,6 +30,9 @@ public: int main(const std::vector & /*args*/) override; protected: + + Poco::Util::LayeredConfiguration & getClientConfiguration() override; + void connect() override; void processError(const String & query) const override; @@ -63,6 +66,8 @@ private: void applyCmdOptions(ContextMutablePtr context); void applyCmdSettings(ContextMutablePtr context); + ServerSettings server_settings; + std::optional status; std::optional temporary_directory_to_delete; diff --git a/programs/main.cpp b/programs/main.cpp index c270388f17f..61e2bc18ed7 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -13,6 +13,7 @@ #include +#include "config.h" #include "config_tools.h" #include @@ -439,6 +440,14 @@ extern "C" } #endif +/// Prevent messages from JeMalloc in the release build. +/// Some of these messages are non-actionable for the users, such as: +/// : Number of CPUs detected is not deterministic. Per-CPU arena disabled. +#if USE_JEMALLOC && defined(NDEBUG) && !defined(SANITIZER) +extern "C" void (*malloc_message)(void *, const char *s); +__attribute__((constructor(0))) void init_je_malloc_message() { malloc_message = [](void *, const char *){}; } +#endif + /// This allows to implement assert to forbid initialization of a class in static constructors. /// Usage: /// diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 83839cc21ac..14af330f788 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -13,7 +13,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES getIdentifierQuote.cpp odbc-bridge.cpp validateODBCConnectionString.cpp - createFunctionBaseCast.cpp ) clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) @@ -25,6 +24,7 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE clickhouse_parsers ch_contrib::nanodbc ch_contrib::unixodbc + clickhouse_functions ) set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 7451a015ff0..4cb3b5f45c7 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1003,6 +1003,8 @@ try ServerUUID::load(path / "uuid", log); + PlacementInfo::PlacementInfo::instance().initialize(config()); + zkutil::validateZooKeeperConfig(config()); bool has_zookeeper = zkutil::hasZooKeeperConfig(config()); @@ -1380,8 +1382,8 @@ try global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows); #if USE_EMBEDDED_COMPILER - size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE); - size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES); + size_t compiled_expression_cache_max_size_in_bytes = server_settings.compiled_expression_cache_size; + size_t compiled_expression_cache_max_elements = server_settings.compiled_expression_cache_elements_size; CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements); #endif @@ -1407,8 +1409,8 @@ try tryLogCurrentException(log, "Disabling cgroup memory observer because of an error during initialization"); } - const std::string cert_path = config().getString("openSSL.server.certificateFile", ""); - const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); + std::string cert_path = config().getString("openSSL.server.certificateFile", ""); + std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); std::vector extra_paths = {include_from_path}; if (!cert_path.empty()) @@ -1416,6 +1418,18 @@ try if (!key_path.empty()) extra_paths.emplace_back(key_path); + Poco::Util::AbstractConfiguration::Keys protocols; + config().keys("protocols", protocols); + for (const auto & protocol : protocols) + { + cert_path = config().getString("protocols." + protocol + ".certificateFile", ""); + key_path = config().getString("protocols." + protocol + ".privateKeyFile", ""); + if (!cert_path.empty()) + extra_paths.emplace_back(cert_path); + if (!key_path.empty()) + extra_paths.emplace_back(key_path); + } + auto main_config_reloader = std::make_unique( config_path, extra_paths, @@ -1528,6 +1542,8 @@ try global_context->setMaxDictionaryNumToWarn(new_server_settings.max_dictionary_num_to_warn); global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn); global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn); + /// Only for system.server_settings + global_context->setConfigReloaderInterval(new_server_settings.config_reload_interval_ms); SlotCount concurrent_threads_soft_limit = UnlimitedSlots; if (new_server_settings.concurrent_threads_soft_limit_num > 0 && new_server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit) @@ -1617,6 +1633,10 @@ try 0, // We don't need any threads one all the parts will be deleted new_server_settings.max_parts_cleaning_thread_pool_size); + + global_context->setMergeWorkload(new_server_settings.merge_workload); + global_context->setMutationWorkload(new_server_settings.mutation_workload); + if (config->has("resources")) { global_context->getResourceManager()->updateConfiguration(*config); @@ -1652,7 +1672,7 @@ try CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs"); #if USE_SSL - CertificateReloader::instance().tryLoad(*config); + CertificateReloader::instance().tryReloadAll(*config); #endif NamedCollectionFactory::instance().reloadFromConfig(*config); @@ -1686,8 +1706,7 @@ try /// Must be the last. latest_config = config; - }, - /* already_loaded = */ false); /// Reload it right now (initial loading) + }); const auto listen_hosts = getListenHosts(config()); const auto interserver_listen_hosts = getInterserverListenHosts(config()); @@ -1800,11 +1819,6 @@ try } - if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX)) - { - PlacementInfo::PlacementInfo::instance().initialize(config()); - } - { std::lock_guard lock(servers_lock); /// We should start interserver communications before (and more important shutdown after) tables. diff --git a/programs/server/config.xml b/programs/server/config.xml index b7a4b8dd0e9..94825a55f67 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -29,7 +29,14 @@ --> 1000M 10 + + + + + + + - + true @@ -408,13 +415,11 @@ - 5368709120 + You should not lower this value. --> + - - 5368709120 + + - 1000 + - 134217728 + - 10000 + + + + /var/lib/clickhouse/caches/ @@ -1155,6 +1170,18 @@ false + + + system + error_log
+ 7500 + 1048576 + 8192 + 524288 + 1000 + false +
+ + + + - - - 1073741824 - 1024 - 1048576 - 30000000 - - backups diff --git a/programs/server/config.yaml.example b/programs/server/config.yaml.example index 9fc188e97aa..5d5499f876c 100644 --- a/programs/server/config.yaml.example +++ b/programs/server/config.yaml.example @@ -260,7 +260,10 @@ uncompressed_cache_size: 8589934592 # Approximate size of mark cache, used in tables of MergeTree family. # In bytes. Cache is single for server. Memory is allocated only on demand. # You should not lower this value. -mark_cache_size: 5368709120 +# mark_cache_size: 5368709120 + +# For marks of secondary indices. +# index_mark_cache_size: 5368709120 # If you enable the `min_bytes_to_use_mmap_io` setting, # the data in MergeTree tables can be read with mmap to avoid copying from kernel to userspace. @@ -277,13 +280,20 @@ mark_cache_size: 5368709120 # in query or server memory usage - because this memory can be discarded similar to OS page cache. # The cache is dropped (the files are closed) automatically on removal of old parts in MergeTree, # also it can be dropped manually by the SYSTEM DROP MMAP CACHE query. -mmap_cache_size: 1000 +# mmap_cache_size: 1024 # Cache size in bytes for compiled expressions. -compiled_expression_cache_size: 134217728 +# compiled_expression_cache_size: 134217728 # Cache size in elements for compiled expressions. -compiled_expression_cache_elements_size: 10000 +# compiled_expression_cache_elements_size: 10000 + +# Configuration for the query cache +# query_cache: +# max_size_in_bytes: 1073741824 +# max_entries: 1024 +# max_entry_size_in_bytes: 1048576 +# max_entry_size_in_rows: 30000000 # Path to data directory, with trailing slash. path: /var/lib/clickhouse/ @@ -726,6 +736,13 @@ metric_log: flush_interval_milliseconds: 7500 collect_interval_milliseconds: 1000 +# Error log contains rows with current values of errors collected with "collect_interval_milliseconds" interval. +error_log: + database: system + table: error_log + flush_interval_milliseconds: 7500 + collect_interval_milliseconds: 1000 + # Asynchronous metric log contains values of metrics from # system.asynchronous_metrics. asynchronous_metric_log: diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index c3bb42160ad..353358fac65 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -261,7 +261,24 @@ AccessControl::AccessControl() } -AccessControl::~AccessControl() = default; +AccessControl::~AccessControl() +{ + try + { + AccessControl::shutdown(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + + +void AccessControl::shutdown() +{ + MultipleAccessStorage::shutdown(); + removeAllStorages(); +} void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index d1537219a06..bfaf256ad48 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -53,6 +53,9 @@ public: AccessControl(); ~AccessControl() override; + /// Shutdown the access control and stops all background activity. + void shutdown() override; + /// Initializes access storage (user directories). void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, const zkutil::GetZooKeeper & get_zookeeper_function_); diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index bf1fe3feec3..f8df56516ec 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -108,6 +108,9 @@ bool Authentication::areCredentialsValid( case AuthenticationType::HTTP: throw Authentication::Require("ClickHouse Basic Authentication"); + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::KERBEROS: return external_authenticators.checkKerberosCredentials(auth_data.getKerberosRealm(), *gss_acceptor_context); @@ -149,6 +152,9 @@ bool Authentication::areCredentialsValid( case AuthenticationType::SSL_CERTIFICATE: throw Authentication::Require("ClickHouse X.509 Authentication"); + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::SSH_KEY: #if USE_SSH throw Authentication::Require("SSH Keys Authentication"); @@ -193,6 +199,9 @@ bool Authentication::areCredentialsValid( throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::BCRYPT_PASSWORD: return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary()); @@ -222,6 +231,9 @@ bool Authentication::areCredentialsValid( case AuthenticationType::HTTP: throw Authentication::Require("ClickHouse Basic Authentication"); + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::KERBEROS: throw Authentication::Require(auth_data.getKerberosRealm()); @@ -254,6 +266,9 @@ bool Authentication::areCredentialsValid( case AuthenticationType::HTTP: throw Authentication::Require("ClickHouse Basic Authentication"); + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::KERBEROS: throw Authentication::Require(auth_data.getKerberosRealm()); diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index 70355fadfbd..e9bc111e18a 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -135,6 +135,7 @@ void AuthenticationData::setPassword(const String & password_) case AuthenticationType::BCRYPT_PASSWORD: case AuthenticationType::NO_PASSWORD: case AuthenticationType::LDAP: + case AuthenticationType::JWT: case AuthenticationType::KERBEROS: case AuthenticationType::SSL_CERTIFICATE: case AuthenticationType::SSH_KEY: @@ -251,6 +252,7 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash) case AuthenticationType::NO_PASSWORD: case AuthenticationType::LDAP: + case AuthenticationType::JWT: case AuthenticationType::KERBEROS: case AuthenticationType::SSL_CERTIFICATE: case AuthenticationType::SSH_KEY: @@ -322,6 +324,10 @@ std::shared_ptr AuthenticationData::toAST() const node->children.push_back(std::make_shared(getLDAPServerName())); break; } + case AuthenticationType::JWT: + { + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + } case AuthenticationType::KERBEROS: { const auto & realm = getKerberosRealm(); diff --git a/src/Access/CachedAccessChecking.cpp b/src/Access/CachedAccessChecking.cpp index aa8ef6073d3..0d629e7b77a 100644 --- a/src/Access/CachedAccessChecking.cpp +++ b/src/Access/CachedAccessChecking.cpp @@ -4,12 +4,12 @@ namespace DB { -CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_) +CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_) : CachedAccessChecking(access_, AccessRightsElement{access_flags_}) { } -CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_) +CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_) : access(access_), element(element_) { } diff --git a/src/Access/CachedAccessChecking.h b/src/Access/CachedAccessChecking.h index e87c28dd823..aaeea6ceddc 100644 --- a/src/Access/CachedAccessChecking.h +++ b/src/Access/CachedAccessChecking.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -13,14 +14,14 @@ class ContextAccess; class CachedAccessChecking { public: - CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_); - CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_); + CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_); + CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_); ~CachedAccessChecking(); bool checkAccess(bool throw_if_denied = true); private: - const std::shared_ptr access; + const std::shared_ptr access; const AccessRightsElement element; bool checked = false; bool result = false; diff --git a/src/Access/Common/AuthenticationType.cpp b/src/Access/Common/AuthenticationType.cpp index 2cc126ad9b7..427765b8a79 100644 --- a/src/Access/Common/AuthenticationType.cpp +++ b/src/Access/Common/AuthenticationType.cpp @@ -72,6 +72,11 @@ const AuthenticationTypeInfo & AuthenticationTypeInfo::get(AuthenticationType ty static const auto info = make_info(Keyword::HTTP); return info; } + case AuthenticationType::JWT: + { + static const auto info = make_info(Keyword::JWT); + return info; + } case AuthenticationType::MAX: break; } diff --git a/src/Access/Common/AuthenticationType.h b/src/Access/Common/AuthenticationType.h index a68549aff4c..16f4388bbff 100644 --- a/src/Access/Common/AuthenticationType.h +++ b/src/Access/Common/AuthenticationType.h @@ -41,6 +41,9 @@ enum class AuthenticationType : uint8_t /// Authentication through HTTP protocol HTTP, + /// JSON Web Token + JWT, + MAX, }; diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 28a825de6cf..a2807ecc5ea 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -20,6 +20,7 @@ #include #include #include +#include namespace DB @@ -271,7 +272,7 @@ namespace std::shared_ptr ContextAccess::fromContext(const ContextPtr & context) { - return context->getAccess(); + return ContextAccessWrapper::fromContext(context)->getAccess(); } @@ -560,7 +561,7 @@ std::shared_ptr ContextAccess::getAccessRightsWithImplicit() template -bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... args) const +bool ContextAccess::checkAccessImplHelper(const ContextPtr & context, AccessFlags flags, const Args &... args) const { if (user_was_dropped) { @@ -573,8 +574,10 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg if (params.full_access) return true; - auto access_granted = [] + auto access_granted = [&] { + if constexpr (throw_if_denied) + context->addQueryPrivilegesInfo(AccessRightsElement{flags, args...}.toStringWithoutOptions(), true); return true; }; @@ -583,7 +586,10 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg FmtArgs && ...fmt_args [[maybe_unused]]) { if constexpr (throw_if_denied) + { + context->addQueryPrivilegesInfo(AccessRightsElement{flags, args...}.toStringWithoutOptions(), false); throw Exception(error_code, std::move(fmt_string), getUserName(), std::forward(fmt_args)...); + } return false; }; @@ -686,102 +692,102 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg } template -bool ContextAccess::checkAccessImpl(const AccessFlags & flags) const +bool ContextAccess::checkAccessImpl(const ContextPtr & context, const AccessFlags & flags) const { - return checkAccessImplHelper(flags); + return checkAccessImplHelper(context, flags); } template -bool ContextAccess::checkAccessImpl(const AccessFlags & flags, std::string_view database, const Args &... args) const +bool ContextAccess::checkAccessImpl(const ContextPtr & context, const AccessFlags & flags, std::string_view database, const Args &... args) const { - return checkAccessImplHelper(flags, database.empty() ? params.current_database : database, args...); + return checkAccessImplHelper(context, flags, database.empty() ? params.current_database : database, args...); } template -bool ContextAccess::checkAccessImplHelper(const AccessRightsElement & element) const +bool ContextAccess::checkAccessImplHelper(const ContextPtr & context, const AccessRightsElement & element) const { assert(!element.grant_option || grant_option); if (element.isGlobalWithParameter()) { if (element.any_parameter) - return checkAccessImpl(element.access_flags); + return checkAccessImpl(context, element.access_flags); else - return checkAccessImpl(element.access_flags, element.parameter); + return checkAccessImpl(context, element.access_flags, element.parameter); } else if (element.any_database) - return checkAccessImpl(element.access_flags); + return checkAccessImpl(context, element.access_flags); else if (element.any_table) - return checkAccessImpl(element.access_flags, element.database); + return checkAccessImpl(context, element.access_flags, element.database); else if (element.any_column) - return checkAccessImpl(element.access_flags, element.database, element.table); + return checkAccessImpl(context, element.access_flags, element.database, element.table); else - return checkAccessImpl(element.access_flags, element.database, element.table, element.columns); + return checkAccessImpl(context, element.access_flags, element.database, element.table, element.columns); } template -bool ContextAccess::checkAccessImpl(const AccessRightsElement & element) const +bool ContextAccess::checkAccessImpl(const ContextPtr & context, const AccessRightsElement & element) const { if constexpr (grant_option) { - return checkAccessImplHelper(element); + return checkAccessImplHelper(context, element); } else { if (element.grant_option) - return checkAccessImplHelper(element); + return checkAccessImplHelper(context, element); else - return checkAccessImplHelper(element); + return checkAccessImplHelper(context, element); } } template -bool ContextAccess::checkAccessImpl(const AccessRightsElements & elements) const +bool ContextAccess::checkAccessImpl(const ContextPtr & context, const AccessRightsElements & elements) const { for (const auto & element : elements) - if (!checkAccessImpl(element)) + if (!checkAccessImpl(context, element)) return false; return true; } -bool ContextAccess::isGranted(const AccessFlags & flags) const { return checkAccessImpl(flags); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(flags, database); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(flags, database, table); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(flags, database, table, column); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return checkAccessImpl(flags, database, table, columns); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(flags, database, table, columns); } -bool ContextAccess::isGranted(const AccessRightsElement & element) const { return checkAccessImpl(element); } -bool ContextAccess::isGranted(const AccessRightsElements & elements) const { return checkAccessImpl(elements); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags) const { return checkAccessImpl(context, flags); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(context, flags, database); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(context, flags, database, table); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(context, flags, database, table, column); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return checkAccessImpl(context, flags, database, table, columns); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(context, flags, database, table, columns); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessRightsElement & element) const { return checkAccessImpl(context, element); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessRightsElements & elements) const { return checkAccessImpl(context, elements); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags) const { return checkAccessImpl(flags); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(flags, database); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(flags, database, table); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(flags, database, table, column); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return checkAccessImpl(flags, database, table, columns); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(flags, database, table, columns); } -bool ContextAccess::hasGrantOption(const AccessRightsElement & element) const { return checkAccessImpl(element); } -bool ContextAccess::hasGrantOption(const AccessRightsElements & elements) const { return checkAccessImpl(elements); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags) const { return checkAccessImpl(context, flags); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(context, flags, database); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(context, flags, database, table); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(context, flags, database, table, column); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return checkAccessImpl(context, flags, database, table, columns); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(context, flags, database, table, columns); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessRightsElement & element) const { return checkAccessImpl(context, element); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessRightsElements & elements) const { return checkAccessImpl(context, elements); } -void ContextAccess::checkAccess(const AccessFlags & flags) const { checkAccessImpl(flags); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database) const { checkAccessImpl(flags, database); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const { checkAccessImpl(flags, database, table); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { checkAccessImpl(flags, database, table, column); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { checkAccessImpl(flags, database, table, columns); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { checkAccessImpl(flags, database, table, columns); } -void ContextAccess::checkAccess(const AccessRightsElement & element) const { checkAccessImpl(element); } -void ContextAccess::checkAccess(const AccessRightsElements & elements) const { checkAccessImpl(elements); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags) const { checkAccessImpl(context, flags); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const { checkAccessImpl(context, flags, database); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const { checkAccessImpl(context, flags, database, table); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { checkAccessImpl(context, flags, database, table, column); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { checkAccessImpl(context, flags, database, table, columns); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { checkAccessImpl(context, flags, database, table, columns); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessRightsElement & element) const { checkAccessImpl(context, element); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessRightsElements & elements) const { checkAccessImpl(context, elements); } -void ContextAccess::checkGrantOption(const AccessFlags & flags) const { checkAccessImpl(flags); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database) const { checkAccessImpl(flags, database); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const { checkAccessImpl(flags, database, table); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { checkAccessImpl(flags, database, table, column); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { checkAccessImpl(flags, database, table, columns); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { checkAccessImpl(flags, database, table, columns); } -void ContextAccess::checkGrantOption(const AccessRightsElement & element) const { checkAccessImpl(element); } -void ContextAccess::checkGrantOption(const AccessRightsElements & elements) const { checkAccessImpl(elements); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags) const { checkAccessImpl(context, flags); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const { checkAccessImpl(context, flags, database); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const { checkAccessImpl(context, flags, database, table); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { checkAccessImpl(context, flags, database, table, column); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { checkAccessImpl(context, flags, database, table, columns); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { checkAccessImpl(context, flags, database, table, columns); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessRightsElement & element) const { checkAccessImpl(context, element); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessRightsElements & elements) const { checkAccessImpl(context, elements); } template -bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const GetNameFunction & get_name_function) const +bool ContextAccess::checkAdminOptionImplHelper(const ContextPtr & context, const Container & role_ids, const GetNameFunction & get_name_function) const { auto show_error = [](int error_code [[maybe_unused]], FormatStringHelper fmt_string [[maybe_unused]], @@ -804,7 +810,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const if (!std::size(role_ids)) return true; - if (isGranted(AccessType::ROLE_ADMIN)) + if (isGranted(context, AccessType::ROLE_ADMIN)) return true; auto info = getRolesInfo(); @@ -840,54 +846,54 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const } template -bool ContextAccess::checkAdminOptionImpl(const UUID & role_id) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id) const { - return checkAdminOptionImplHelper(to_array(role_id), [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); + return checkAdminOptionImplHelper(context, to_array(role_id), [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); } template -bool ContextAccess::checkAdminOptionImpl(const UUID & role_id, const String & role_name) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id, const String & role_name) const { - return checkAdminOptionImplHelper(to_array(role_id), [&role_name](const UUID &, size_t) { return std::optional{role_name}; }); + return checkAdminOptionImplHelper(context, to_array(role_id), [&role_name](const UUID &, size_t) { return std::optional{role_name}; }); } template -bool ContextAccess::checkAdminOptionImpl(const UUID & role_id, const std::unordered_map & names_of_roles) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const { - return checkAdminOptionImplHelper(to_array(role_id), [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); + return checkAdminOptionImplHelper(context, to_array(role_id), [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); } template -bool ContextAccess::checkAdminOptionImpl(const std::vector & role_ids) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids) const { - return checkAdminOptionImplHelper(role_ids, [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); + return checkAdminOptionImplHelper(context, role_ids, [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); } template -bool ContextAccess::checkAdminOptionImpl(const std::vector & role_ids, const Strings & names_of_roles) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const { - return checkAdminOptionImplHelper(role_ids, [&names_of_roles](const UUID &, size_t i) { return std::optional{names_of_roles[i]}; }); + return checkAdminOptionImplHelper(context, role_ids, [&names_of_roles](const UUID &, size_t i) { return std::optional{names_of_roles[i]}; }); } template -bool ContextAccess::checkAdminOptionImpl(const std::vector & role_ids, const std::unordered_map & names_of_roles) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const { - return checkAdminOptionImplHelper(role_ids, [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); + return checkAdminOptionImplHelper(context, role_ids, [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); } -bool ContextAccess::hasAdminOption(const UUID & role_id) const { return checkAdminOptionImpl(role_id); } -bool ContextAccess::hasAdminOption(const UUID & role_id, const String & role_name) const { return checkAdminOptionImpl(role_id, role_name); } -bool ContextAccess::hasAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const { return checkAdminOptionImpl(role_id, names_of_roles); } -bool ContextAccess::hasAdminOption(const std::vector & role_ids) const { return checkAdminOptionImpl(role_ids); } -bool ContextAccess::hasAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const { return checkAdminOptionImpl(role_ids, names_of_roles); } -bool ContextAccess::hasAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const { return checkAdminOptionImpl(role_ids, names_of_roles); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const UUID & role_id) const { return checkAdminOptionImpl(context, role_id); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const UUID & role_id, const String & role_name) const { return checkAdminOptionImpl(context, role_id, role_name); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const { return checkAdminOptionImpl(context, role_id, names_of_roles); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const std::vector & role_ids) const { return checkAdminOptionImpl(context, role_ids); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const { return checkAdminOptionImpl(context, role_ids, names_of_roles); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const { return checkAdminOptionImpl(context, role_ids, names_of_roles); } -void ContextAccess::checkAdminOption(const UUID & role_id) const { checkAdminOptionImpl(role_id); } -void ContextAccess::checkAdminOption(const UUID & role_id, const String & role_name) const { checkAdminOptionImpl(role_id, role_name); } -void ContextAccess::checkAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const { checkAdminOptionImpl(role_id, names_of_roles); } -void ContextAccess::checkAdminOption(const std::vector & role_ids) const { checkAdminOptionImpl(role_ids); } -void ContextAccess::checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const { checkAdminOptionImpl(role_ids, names_of_roles); } -void ContextAccess::checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const { checkAdminOptionImpl(role_ids, names_of_roles); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const UUID & role_id) const { checkAdminOptionImpl(context, role_id); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const UUID & role_id, const String & role_name) const { checkAdminOptionImpl(context, role_id, role_name); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const { checkAdminOptionImpl(context, role_id, names_of_roles); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const std::vector & role_ids) const { checkAdminOptionImpl(context, role_ids); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const { checkAdminOptionImpl(context, role_ids, names_of_roles); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const { checkAdminOptionImpl(context, role_ids, names_of_roles); } void ContextAccess::checkGranteeIsAllowed(const UUID & grantee_id, const IAccessEntity & grantee) const @@ -919,4 +925,10 @@ void ContextAccess::checkGranteesAreAllowed(const std::vector & grantee_id } } +std::shared_ptr ContextAccessWrapper::fromContext(const ContextPtr & context) +{ + return context->getAccess(); +} + + } diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 237c423d261..465932af1d3 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -4,9 +4,12 @@ #include #include #include +#include +#include #include #include #include +#include #include #include #include @@ -71,59 +74,59 @@ public: /// Checks if a specified access is granted, and throws an exception if not. /// Empty database means the current database. - void checkAccess(const AccessFlags & flags) const; - void checkAccess(const AccessFlags & flags, std::string_view database) const; - void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const; - void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; - void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; - void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; - void checkAccess(const AccessRightsElement & element) const; - void checkAccess(const AccessRightsElements & elements) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; + void checkAccess(const ContextPtr & context, const AccessRightsElement & element) const; + void checkAccess(const ContextPtr & context, const AccessRightsElements & elements) const; - void checkGrantOption(const AccessFlags & flags) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; - void checkGrantOption(const AccessRightsElement & element) const; - void checkGrantOption(const AccessRightsElements & elements) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; + void checkGrantOption(const ContextPtr & context, const AccessRightsElement & element) const; + void checkGrantOption(const ContextPtr & context, const AccessRightsElements & elements) const; /// Checks if a specified access is granted, and returns false if not. /// Empty database means the current database. - bool isGranted(const AccessFlags & flags) const; - bool isGranted(const AccessFlags & flags, std::string_view database) const; - bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table) const; - bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; - bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; - bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; - bool isGranted(const AccessRightsElement & element) const; - bool isGranted(const AccessRightsElements & elements) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; + bool isGranted(const ContextPtr & context, const AccessRightsElement & element) const; + bool isGranted(const ContextPtr & context, const AccessRightsElements & elements) const; - bool hasGrantOption(const AccessFlags & flags) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; - bool hasGrantOption(const AccessRightsElement & element) const; - bool hasGrantOption(const AccessRightsElements & elements) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; + bool hasGrantOption(const ContextPtr & context, const AccessRightsElement & element) const; + bool hasGrantOption(const ContextPtr & context, const AccessRightsElements & elements) const; /// Checks if a specified role is granted with admin option, and throws an exception if not. - void checkAdminOption(const UUID & role_id) const; - void checkAdminOption(const UUID & role_id, const String & role_name) const; - void checkAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const; - void checkAdminOption(const std::vector & role_ids) const; - void checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const; - void checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const; + void checkAdminOption(const ContextPtr & context, const UUID & role_id) const; + void checkAdminOption(const ContextPtr & context, const UUID & role_id, const String & role_name) const; + void checkAdminOption(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const; + void checkAdminOption(const ContextPtr & context, const std::vector & role_ids) const; + void checkAdminOption(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const; + void checkAdminOption(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const; /// Checks if a specified role is granted with admin option, and returns false if not. - bool hasAdminOption(const UUID & role_id) const; - bool hasAdminOption(const UUID & role_id, const String & role_name) const; - bool hasAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const; - bool hasAdminOption(const std::vector & role_ids) const; - bool hasAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const; - bool hasAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const; + bool hasAdminOption(const ContextPtr & context, const UUID & role_id) const; + bool hasAdminOption(const ContextPtr & context, const UUID & role_id, const String & role_name) const; + bool hasAdminOption(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const; + bool hasAdminOption(const ContextPtr & context, const std::vector & role_ids) const; + bool hasAdminOption(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const; + bool hasAdminOption(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const; /// Checks if a grantee is allowed for the current user, throws an exception if not. void checkGranteeIsAllowed(const UUID & grantee_id, const IAccessEntity & grantee) const; @@ -142,43 +145,43 @@ private: void calculateAccessRights() const TSA_REQUIRES(mutex); template - bool checkAccessImpl(const AccessFlags & flags) const; + bool checkAccessImpl(const ContextPtr & context, const AccessFlags & flags) const; template - bool checkAccessImpl(const AccessFlags & flags, std::string_view database, const Args &... args) const; + bool checkAccessImpl(const ContextPtr & context, const AccessFlags & flags, std::string_view database, const Args &... args) const; template - bool checkAccessImpl(const AccessRightsElement & element) const; + bool checkAccessImpl(const ContextPtr & context, const AccessRightsElement & element) const; template - bool checkAccessImpl(const AccessRightsElements & elements) const; + bool checkAccessImpl(const ContextPtr & context, const AccessRightsElements & elements) const; template - bool checkAccessImplHelper(AccessFlags flags, const Args &... args) const; + bool checkAccessImplHelper(const ContextPtr & context, AccessFlags flags, const Args &... args) const; template - bool checkAccessImplHelper(const AccessRightsElement & element) const; + bool checkAccessImplHelper(const ContextPtr & context, const AccessRightsElement & element) const; template - bool checkAdminOptionImpl(const UUID & role_id) const; + bool checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id) const; template - bool checkAdminOptionImpl(const UUID & role_id, const String & role_name) const; + bool checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id, const String & role_name) const; template - bool checkAdminOptionImpl(const UUID & role_id, const std::unordered_map & names_of_roles) const; + bool checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const; template - bool checkAdminOptionImpl(const std::vector & role_ids) const; + bool checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids) const; template - bool checkAdminOptionImpl(const std::vector & role_ids, const Strings & names_of_roles) const; + bool checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const; template - bool checkAdminOptionImpl(const std::vector & role_ids, const std::unordered_map & names_of_roles) const; + bool checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const; template - bool checkAdminOptionImplHelper(const Container & role_ids, const GetNameFunction & get_name_function) const; + bool checkAdminOptionImplHelper(const ContextPtr & context, const Container & role_ids, const GetNameFunction & get_name_function) const; const AccessControl * access_control = nullptr; const Params params; @@ -203,4 +206,115 @@ private: mutable std::shared_ptr enabled_settings TSA_GUARDED_BY(mutex); }; +/// This wrapper was added to be able to pass the current context to the access +/// without the need to change the signature and all calls to the ContextAccess itself. +/// Right now a context is used to store privileges that are checked for a query, +/// and might be useful for something else in the future as well. +class ContextAccessWrapper : public std::enable_shared_from_this +{ +public: + using ContextAccessPtr = std::shared_ptr; + + ContextAccessWrapper(const ContextAccessPtr & access_, const ContextPtr & context_): access(access_), context(context_) {} + ~ContextAccessWrapper() = default; + + static std::shared_ptr fromContext(const ContextPtr & context); + + const ContextAccess::Params & getParams() const { return access->getParams(); } + + const ContextAccessPtr & getAccess() const { return access; } + + /// Returns the current user. Throws if user is nullptr. + ALWAYS_INLINE UserPtr getUser() const { return access->getUser(); } + /// Same as above, but can return nullptr. + ALWAYS_INLINE UserPtr tryGetUser() const { return access->tryGetUser(); } + ALWAYS_INLINE String getUserName() const { return access->getUserName(); } + ALWAYS_INLINE std::optional getUserID() const { return access->getUserID(); } + + /// Returns information about current and enabled roles. + ALWAYS_INLINE std::shared_ptr getRolesInfo() const { return access->getRolesInfo(); } + + /// Returns the row policy filter for a specified table. + /// The function returns nullptr if there is no filter to apply. + ALWAYS_INLINE RowPolicyFilterPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const { return access->getRowPolicyFilter(database, table_name, filter_type); } + + /// Returns the quota to track resource consumption. + ALWAYS_INLINE std::shared_ptr getQuota() const { return access->getQuota(); } + ALWAYS_INLINE std::optional getQuotaUsage() const { return access->getQuotaUsage(); } + + /// Returns the default settings, i.e. the settings which should be applied on user's login. + ALWAYS_INLINE SettingsChanges getDefaultSettings() const { return access->getDefaultSettings(); } + ALWAYS_INLINE std::shared_ptr getDefaultProfileInfo() const { return access->getDefaultProfileInfo(); } + + /// Returns the current access rights. + ALWAYS_INLINE std::shared_ptr getAccessRights() const { return access->getAccessRights(); } + ALWAYS_INLINE std::shared_ptr getAccessRightsWithImplicit() const { return access->getAccessRightsWithImplicit(); } + + /// Checks if a specified access is granted, and throws an exception if not. + /// Empty database means the current database. + ALWAYS_INLINE void checkAccess(const AccessFlags & flags) const { access->checkAccess(context, flags); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database) const { access->checkAccess(context, flags, database); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const { access->checkAccess(context, flags, database, table); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { access->checkAccess(context, flags, database, table, column); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { access->checkAccess(context, flags, database, table, columns); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { access->checkAccess(context, flags, database, table, columns); } + ALWAYS_INLINE void checkAccess(const AccessRightsElement & element) const { access->checkAccess(context, element); } + ALWAYS_INLINE void checkAccess(const AccessRightsElements & elements) const { access->checkAccess(context, elements); } + + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags) const { access->checkGrantOption(context, flags); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database) const { access->checkGrantOption(context, flags, database); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const { access->checkGrantOption(context, flags, database, table); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { access->checkGrantOption(context, flags, database, table, column); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { access->checkGrantOption(context, flags, database, table, columns); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { access->checkGrantOption(context, flags, database, table, columns); } + ALWAYS_INLINE void checkGrantOption(const AccessRightsElement & element) const { access->checkGrantOption(context, element); } + ALWAYS_INLINE void checkGrantOption(const AccessRightsElements & elements) const { access->checkGrantOption(context, elements); } + + /// Checks if a specified access is granted, and returns false if not. + /// Empty database means the current database. + ALWAYS_INLINE bool isGranted(const AccessFlags & flags) const { return access->isGranted(context, flags); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database) const { return access->isGranted(context, flags, database); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table) const { return access->isGranted(context, flags, database, table); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return access->isGranted(context, flags, database, table, column); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return access->isGranted(context, flags, database, table, columns); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return access->isGranted(context, flags, database, table, columns); } + ALWAYS_INLINE bool isGranted(const AccessRightsElement & element) const { return access->isGranted(context, element); } + ALWAYS_INLINE bool isGranted(const AccessRightsElements & elements) const { return access->isGranted(context, elements); } + + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags) const { return access->hasGrantOption(context, flags); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database) const { return access->hasGrantOption(context, flags, database); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const { return access->hasGrantOption(context, flags, database, table); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return access->hasGrantOption(context, flags, database, table, column); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return access->hasGrantOption(context, flags, database, table, columns); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return access->hasGrantOption(context, flags, database, table, columns); } + ALWAYS_INLINE bool hasGrantOption(const AccessRightsElement & element) const { return access->hasGrantOption(context, element); } + ALWAYS_INLINE bool hasGrantOption(const AccessRightsElements & elements) const { return access->hasGrantOption(context, elements); } + + /// Checks if a specified role is granted with admin option, and throws an exception if not. + ALWAYS_INLINE void checkAdminOption(const UUID & role_id) const { access->checkAdminOption(context, role_id); } + ALWAYS_INLINE void checkAdminOption(const UUID & role_id, const String & role_name) const { access->checkAdminOption(context, role_id, role_name); } + ALWAYS_INLINE void checkAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const { access->checkAdminOption(context, role_id, names_of_roles); } + ALWAYS_INLINE void checkAdminOption(const std::vector & role_ids) const { access->checkAdminOption(context, role_ids); } + ALWAYS_INLINE void checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const { access->checkAdminOption(context, role_ids, names_of_roles); } + ALWAYS_INLINE void checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const { access->checkAdminOption(context, role_ids, names_of_roles); } + + /// Checks if a specified role is granted with admin option, and returns false if not. + ALWAYS_INLINE bool hasAdminOption(const UUID & role_id) const { return access->hasAdminOption(context, role_id); } + ALWAYS_INLINE bool hasAdminOption(const UUID & role_id, const String & role_name) const { return access->hasAdminOption(context, role_id, role_name); } + ALWAYS_INLINE bool hasAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const { return access->hasAdminOption(context, role_id, names_of_roles); } + ALWAYS_INLINE bool hasAdminOption(const std::vector & role_ids) const { return access->hasAdminOption(context, role_ids); } + ALWAYS_INLINE bool hasAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const { return access->hasAdminOption(context, role_ids, names_of_roles); } + ALWAYS_INLINE bool hasAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const { return access->hasAdminOption(context, role_ids, names_of_roles); } + + /// Checks if a grantee is allowed for the current user, throws an exception if not. + ALWAYS_INLINE void checkGranteeIsAllowed(const UUID & grantee_id, const IAccessEntity & grantee) const { access->checkGranteeIsAllowed(grantee_id, grantee); } + /// Checks if grantees are allowed for the current user, throws an exception if not. + ALWAYS_INLINE void checkGranteesAreAllowed(const std::vector & grantee_ids) const { access->checkGranteesAreAllowed(grantee_ids); } + +private: + ContextAccessPtr access; + ContextPtr context; +}; + + } diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index fe698b32816..ee422f7d8ff 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -194,11 +194,9 @@ DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String DiskAccessStorage::~DiskAccessStorage() { - stopListsWritingThread(); - try { - writeLists(); + DiskAccessStorage::shutdown(); } catch (...) { @@ -207,6 +205,17 @@ DiskAccessStorage::~DiskAccessStorage() } +void DiskAccessStorage::shutdown() +{ + stopListsWritingThread(); + + { + std::lock_guard lock{mutex}; + writeLists(); + } +} + + String DiskAccessStorage::getStorageParamsJSON() const { std::lock_guard lock{mutex}; diff --git a/src/Access/DiskAccessStorage.h b/src/Access/DiskAccessStorage.h index 5d94008b34f..38172b26970 100644 --- a/src/Access/DiskAccessStorage.h +++ b/src/Access/DiskAccessStorage.h @@ -18,6 +18,8 @@ public: DiskAccessStorage(const String & storage_name_, const String & directory_path_, AccessChangesNotifier & changes_notifier_, bool readonly_, bool allow_backup_); ~DiskAccessStorage() override; + void shutdown() override; + const char * getStorageType() const override { return STORAGE_TYPE; } String getStorageParamsJSON() const override; diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index 4f980bf9212..e88b1601f32 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -44,6 +44,11 @@ public: explicit IAccessStorage(const String & storage_name_) : storage_name(storage_name_) {} virtual ~IAccessStorage() = default; + /// If the AccessStorage has to do some complicated work when destroying - do it in advance. + /// For example, if the AccessStorage contains any threads for background work - ask them to complete and wait for completion. + /// By default, does nothing. + virtual void shutdown() {} + /// Returns the name of this storage. const String & getStorageName() const { return storage_name; } virtual const char * getStorageType() const = 0; diff --git a/src/Access/MultipleAccessStorage.cpp b/src/Access/MultipleAccessStorage.cpp index a8b508202b5..fda6601e4c6 100644 --- a/src/Access/MultipleAccessStorage.cpp +++ b/src/Access/MultipleAccessStorage.cpp @@ -34,11 +34,23 @@ MultipleAccessStorage::MultipleAccessStorage(const String & storage_name_) MultipleAccessStorage::~MultipleAccessStorage() { - /// It's better to remove the storages in the reverse order because they could depend on each other somehow. + try + { + MultipleAccessStorage::shutdown(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +void MultipleAccessStorage::shutdown() +{ + /// It's better to shutdown the storages in the reverse order because they could depend on each other somehow. const auto storages = getStoragesPtr(); for (const auto & storage : *storages | boost::adaptors::reversed) { - removeStorage(storage); + storage->shutdown(); } } @@ -72,6 +84,16 @@ void MultipleAccessStorage::removeStorage(const StoragePtr & storage_to_remove) ids_cache.clear(); } +void MultipleAccessStorage::removeAllStorages() +{ + /// It's better to remove the storages in the reverse order because they could depend on each other somehow. + const auto storages = getStoragesPtr(); + for (const auto & storage : *storages | boost::adaptors::reversed) + { + removeStorage(storage); + } +} + std::vector MultipleAccessStorage::getStorages() { return *getStoragesPtr(); diff --git a/src/Access/MultipleAccessStorage.h b/src/Access/MultipleAccessStorage.h index 005e6e2b9cd..e1543c59b67 100644 --- a/src/Access/MultipleAccessStorage.h +++ b/src/Access/MultipleAccessStorage.h @@ -21,6 +21,8 @@ public: explicit MultipleAccessStorage(const String & storage_name_ = STORAGE_TYPE); ~MultipleAccessStorage() override; + void shutdown() override; + const char * getStorageType() const override { return STORAGE_TYPE; } bool isReadOnly() const override; bool isReadOnly(const UUID & id) const override; @@ -32,6 +34,7 @@ public: void setStorages(const std::vector & storages); void addStorage(const StoragePtr & new_storage); void removeStorage(const StoragePtr & storage_to_remove); + void removeAllStorages(); std::vector getStorages(); std::vector getStorages() const; std::shared_ptr> getStoragesPtr(); diff --git a/src/Access/ReplicatedAccessStorage.cpp b/src/Access/ReplicatedAccessStorage.cpp index cd9a86a1bd2..ed114327041 100644 --- a/src/Access/ReplicatedAccessStorage.cpp +++ b/src/Access/ReplicatedAccessStorage.cpp @@ -66,6 +66,18 @@ ReplicatedAccessStorage::ReplicatedAccessStorage( } ReplicatedAccessStorage::~ReplicatedAccessStorage() +{ + try + { + ReplicatedAccessStorage::shutdown(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +void ReplicatedAccessStorage::shutdown() { stopWatchingThread(); } diff --git a/src/Access/ReplicatedAccessStorage.h b/src/Access/ReplicatedAccessStorage.h index cddb20860f7..f8518226997 100644 --- a/src/Access/ReplicatedAccessStorage.h +++ b/src/Access/ReplicatedAccessStorage.h @@ -23,6 +23,8 @@ public: ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper, AccessChangesNotifier & changes_notifier_, bool allow_backup); ~ReplicatedAccessStorage() override; + void shutdown() override; + const char * getStorageType() const override { return STORAGE_TYPE; } void startPeriodicReloading() override { startWatchingThread(); } diff --git a/src/Access/SettingsProfilesInfo.cpp b/src/Access/SettingsProfilesInfo.cpp index d8b52ecf5e4..a5eacbe1b6e 100644 --- a/src/Access/SettingsProfilesInfo.cpp +++ b/src/Access/SettingsProfilesInfo.cpp @@ -15,22 +15,8 @@ namespace ErrorCodes bool operator==(const SettingsProfilesInfo & lhs, const SettingsProfilesInfo & rhs) { - if (lhs.settings != rhs.settings) - return false; - - if (lhs.constraints != rhs.constraints) - return false; - - if (lhs.profiles != rhs.profiles) - return false; - - if (lhs.profiles_with_implicit != rhs.profiles_with_implicit) - return false; - - if (lhs.names_of_profiles != rhs.names_of_profiles) - return false; - - return true; + return std::tie(lhs.settings, lhs.constraints, lhs.profiles, lhs.profiles_with_implicit, lhs.names_of_profiles) + == std::tie(rhs.settings, rhs.constraints, rhs.profiles, rhs.profiles_with_implicit, rhs.names_of_profiles); } std::shared_ptr @@ -66,18 +52,20 @@ Strings SettingsProfilesInfo::getProfileNames() const { Strings result; result.reserve(profiles.size()); - for (const auto & profile_id : profiles) + for (const UUID & profile_uuid : profiles) { - const auto p = names_of_profiles.find(profile_id); - if (p != names_of_profiles.end()) - result.push_back(p->second); + const auto names_it = names_of_profiles.find(profile_uuid); + if (names_it != names_of_profiles.end()) + { + result.push_back(names_it->second); + } else { - if (const auto name = access_control.tryReadName(profile_id)) + if (const auto name = access_control.tryReadName(profile_uuid)) // We could've updated cache here, but it is a very rare case, so don't bother. result.push_back(*name); else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to get profile name for {}", toString(profile_id)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to get profile name for {}", toString(profile_uuid)); } } diff --git a/src/Access/SettingsProfilesInfo.h b/src/Access/SettingsProfilesInfo.h index ec289a5ec0a..bc1b01f47d0 100644 --- a/src/Access/SettingsProfilesInfo.h +++ b/src/Access/SettingsProfilesInfo.h @@ -29,7 +29,11 @@ struct SettingsProfilesInfo /// Names of all the profiles in `profiles`. std::unordered_map names_of_profiles; - explicit SettingsProfilesInfo(const AccessControl & access_control_) : constraints(access_control_), access_control(access_control_) {} + explicit SettingsProfilesInfo(const AccessControl & access_control_) + : constraints(access_control_), access_control(access_control_) + { + } + std::shared_ptr getConstraintsAndProfileIDs( const std::shared_ptr & previous = nullptr) const; diff --git a/src/Access/User.cpp b/src/Access/User.cpp index 6a296706baf..c02c598ee40 100644 --- a/src/Access/User.cpp +++ b/src/Access/User.cpp @@ -33,6 +33,8 @@ void User::setName(const String & name_) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_); if (name_.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_); + if (name_.starts_with(EncodedUserInfo::JWT_AUTHENTICAION_MARKER)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_); name = name_; } diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 1f9a977bab6..5e36fe1ad84 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -880,8 +880,7 @@ void UsersConfigAccessStorage::load( Settings::checkNoSettingNamesAtTopLevel(*new_config, users_config_path); parseFromConfig(*new_config); access_control.getChangesNotifier().sendNotifications(); - }, - /* already_loaded = */ false); + }); } void UsersConfigAccessStorage::startPeriodicReloading() diff --git a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp index 7541d64af4a..1c059dc52aa 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp @@ -59,9 +59,26 @@ struct GroupConcatDataBase data_size += str_size; } + void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena) + { + WriteBufferFromOwnString buff; + serialization->serializeText(*column, row_num, buff, FormatSettings{}); + auto string = buff.stringView(); + insertChar(string.data(), string.size(), arena); + } + }; -struct GroupConcatData : public GroupConcatDataBase +template +struct GroupConcatData; + +template<> +struct GroupConcatData final : public GroupConcatDataBase +{ +}; + +template<> +struct GroupConcatData final : public GroupConcatDataBase { using Offset = UInt64; using Allocator = MixedAlignedArenaAllocator; @@ -92,7 +109,7 @@ struct GroupConcatData : public GroupConcatDataBase template class GroupConcatImpl final - : public IAggregateFunctionDataHelper> + : public IAggregateFunctionDataHelper, GroupConcatImpl> { static constexpr auto name = "groupConcat"; @@ -102,7 +119,7 @@ class GroupConcatImpl final public: GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_) - : IAggregateFunctionDataHelper>( + : IAggregateFunctionDataHelper, GroupConcatImpl>( {data_type_}, parameters_, std::make_shared()) , serialization(this->argument_types[0]->getDefaultSerialization()) , limit(limit_) @@ -162,7 +179,6 @@ public: auto & cur_data = this->data(place); writeVarUInt(cur_data.data_size, buf); - writeVarUInt(cur_data.allocated_size, buf); buf.write(cur_data.data, cur_data.data_size); @@ -178,10 +194,13 @@ public: { auto & cur_data = this->data(place); - readVarUInt(cur_data.data_size, buf); - readVarUInt(cur_data.allocated_size, buf); + UInt64 temp_size = 0; + readVarUInt(temp_size, buf); - buf.readStrict(cur_data.data, cur_data.data_size); + cur_data.checkAndUpdateSize(temp_size, arena); + + buf.readStrict(cur_data.data + cur_data.data_size, temp_size); + cur_data.data_size = temp_size; if constexpr (has_limit) { @@ -198,8 +217,7 @@ public: if (cur_data.data_size == 0) { - auto column_nullable = IColumn::mutate(makeNullable(to.getPtr())); - column_nullable->insertDefault(); + to.insertDefault(); return; } diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp b/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp index 05ed85a9004..6c26065a918 100644 --- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp +++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp @@ -91,7 +91,8 @@ public: return std::make_shared>(); } - bool allocatesMemoryInArena() const override { return false; } + /// MaxIntersectionsData::Allocator uses the arena + bool allocatesMemoryInArena() const override { return true; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h b/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h index 4338dcff5c0..f532858b3d8 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h @@ -228,6 +228,11 @@ public: return prefix_size + nested_func->sizeOfData(); } + size_t alignOfData() const override + { + return std::max(alignof(Data), nested_func->alignOfData()); + } + void create(AggregateDataPtr __restrict place) const override { new (place) Data; diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index 8a6276008d8..91186db0e0c 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -43,50 +43,56 @@ public: bool replaced_argument = false; auto replaced_uniq_function_arguments_nodes = function_node->getArguments().getNodes(); - for (auto & uniq_function_argument_node : replaced_uniq_function_arguments_nodes) + /// Replace injective function with its single argument + auto remove_injective_function = [&replaced_argument](QueryTreeNodePtr & arg) -> bool { - auto * uniq_function_argument_node_typed = uniq_function_argument_node->as(); - if (!uniq_function_argument_node_typed || !uniq_function_argument_node_typed->isOrdinaryFunction()) - continue; - - auto & uniq_function_argument_node_argument_nodes = uniq_function_argument_node_typed->getArguments().getNodes(); + auto * arg_typed = arg->as(); + if (!arg_typed || !arg_typed->isOrdinaryFunction()) + return false; /// Do not apply optimization if injective function contains multiple arguments - if (uniq_function_argument_node_argument_nodes.size() != 1) - continue; + auto & arg_arguments_nodes = arg_typed->getArguments().getNodes(); + if (arg_arguments_nodes.size() != 1) + return false; - const auto & uniq_function_argument_node_function = uniq_function_argument_node_typed->getFunction(); - if (!uniq_function_argument_node_function->isInjective({})) - continue; + const auto & arg_function = arg_typed->getFunction(); + if (!arg_function->isInjective({})) + return false; - /// Replace injective function with its single argument - uniq_function_argument_node = uniq_function_argument_node_argument_nodes[0]; - replaced_argument = true; + arg = arg_arguments_nodes[0]; + return replaced_argument = true; + }; + + for (auto & uniq_function_argument_node : replaced_uniq_function_arguments_nodes) + { + while (remove_injective_function(uniq_function_argument_node)) + ; } if (!replaced_argument) return; - DataTypes argument_types; - argument_types.reserve(replaced_uniq_function_arguments_nodes.size()); + DataTypes replaced_argument_types; + replaced_argument_types.reserve(replaced_uniq_function_arguments_nodes.size()); for (const auto & function_node_argument : replaced_uniq_function_arguments_nodes) - argument_types.emplace_back(function_node_argument->getResultType()); + replaced_argument_types.emplace_back(function_node_argument->getResultType()); + auto current_aggregate_function = function_node->getAggregateFunction(); AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( + auto replaced_aggregate_function = AggregateFunctionFactory::instance().get( function_node->getFunctionName(), NullsAction::EMPTY, - argument_types, - function_node->getAggregateFunction()->getParameters(), + replaced_argument_types, + current_aggregate_function->getParameters(), properties); /// uniqCombined returns nullable with nullable arguments so the result type might change which breaks the pass - if (!aggregate_function->getResultType()->equals(*function_node->getAggregateFunction()->getResultType())) + if (!replaced_aggregate_function->getResultType()->equals(*current_aggregate_function->getResultType())) return; - function_node->getArguments().getNodes() = replaced_uniq_function_arguments_nodes; - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + function_node->getArguments().getNodes() = std::move(replaced_uniq_function_arguments_nodes); + function_node->resolveAsAggregateFunction(std::move(replaced_aggregate_function)); } }; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 5e5ecaaa93a..576c4943ccb 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -1,3 +1,5 @@ +#include + #include #include #include @@ -985,18 +987,18 @@ std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded( { result_aggregate_function_name = settings.count_distinct_implementation; } - else if (aggregate_function_name_lowercase == "countdistinctif" || aggregate_function_name_lowercase == "countifdistinct") + else if (aggregate_function_name_lowercase == "countifdistinct" || + (settings.rewrite_count_distinct_if_with_count_distinct_implementation && aggregate_function_name_lowercase == "countdistinctif")) { result_aggregate_function_name = settings.count_distinct_implementation; result_aggregate_function_name += "If"; } - - /// Replace aggregateFunctionIfDistinct into aggregateFunctionDistinctIf to make execution more optimal - if (result_aggregate_function_name.ends_with("ifdistinct")) + else if (aggregate_function_name_lowercase.ends_with("ifdistinct")) { + /// Replace aggregateFunctionIfDistinct into aggregateFunctionDistinctIf to make execution more optimal size_t prefix_length = result_aggregate_function_name.size() - strlen("ifdistinct"); result_aggregate_function_name = result_aggregate_function_name.substr(0, prefix_length) + "DistinctIf"; - } + } bool need_add_or_null = settings.aggregate_functions_null_for_empty && !result_aggregate_function_name.ends_with("OrNull"); if (need_add_or_null) @@ -3495,7 +3497,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi * * 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process. */ -ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias) +ProjectionNames QueryAnalyzer::resolveExpressionNode( + QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias) { checkStackSize(); @@ -4505,7 +4508,36 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, table_name = table_identifier[1]; } - auto parametrized_view_storage = scope_context->getQueryContext()->buildParametrizedViewStorage(function_ast, database_name, table_name); + /// Collect parametrized view arguments + NameToNameMap view_params; + for (const auto & argument : table_function_node_typed.getArguments()) + { + if (auto * arg_func = argument->as()) + { + if (arg_func->getFunctionName() != "equals") + continue; + + auto nodes = arg_func->getArguments().getNodes(); + if (nodes.size() != 2) + continue; + + if (auto * identifier_node = nodes[0]->as()) + { + resolveExpressionNode(nodes[1], scope, /* allow_lambda_expression */false, /* allow_table_function */false); + if (auto * constant = nodes[1]->as()) + { + view_params[identifier_node->getIdentifier().getFullName()] = convertFieldToString(constant->getValue()); + } + } + } + } + + auto context = scope_context->getQueryContext(); + auto parametrized_view_storage = context->buildParametrizedViewStorage( + database_name, + table_name, + view_params); + if (parametrized_view_storage) { auto fake_table_node = std::make_shared(parametrized_view_storage, scope_context); diff --git a/src/Analyzer/SetUtils.cpp b/src/Analyzer/SetUtils.cpp index ceda264b5a6..0ecb3545225 100644 --- a/src/Analyzer/SetUtils.cpp +++ b/src/Analyzer/SetUtils.cpp @@ -9,6 +9,8 @@ #include #include +#include + namespace DB { @@ -54,8 +56,9 @@ size_t getCompoundTypeDepth(const IDataType & type) } template -Block createBlockFromCollection(const Collection & collection, const DataTypes & block_types, bool transform_null_in) +Block createBlockFromCollection(const Collection & collection, const DataTypes& value_types, const DataTypes & block_types, bool transform_null_in) { + assert(collection.size() == value_types.size()); size_t columns_size = block_types.size(); MutableColumns columns(columns_size); for (size_t i = 0; i < columns_size; ++i) @@ -66,13 +69,17 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes & Row tuple_values; - for (const auto & value : collection) + for (size_t collection_index = 0; collection_index < collection.size(); ++collection_index) { + const auto & value = collection[collection_index]; if (columns_size == 1) { - auto field = convertFieldToTypeStrict(value, *block_types[0]); + const DataTypePtr & data_type = value_types[collection_index]; + auto field = convertFieldToTypeStrict(value, *data_type, *block_types[0]); if (!field) + { continue; + } bool need_insert_null = transform_null_in && block_types[0]->isNullable(); if (!field->isNull() || need_insert_null) @@ -87,6 +94,9 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes & value.getTypeName()); const auto & tuple = value.template get(); + const DataTypePtr & value_type = value_types[collection_index]; + const DataTypes & tuple_value_type = typeid_cast(value_type.get())->getElements(); + size_t tuple_size = tuple.size(); if (tuple_size != columns_size) @@ -101,7 +111,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes & size_t i = 0; for (; i < tuple_size; ++i) { - auto converted_field = convertFieldToTypeStrict(tuple[i], *block_types[i]); + auto converted_field = convertFieldToTypeStrict(tuple[i], *tuple_value_type[i], *block_types[i]); if (!converted_field) break; tuple_values[i] = std::move(*converted_field); @@ -147,20 +157,28 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const if (lhs_type_depth == rhs_type_depth) { /// 1 in 1; (1, 2) in (1, 2); identity(tuple(tuple(tuple(1)))) in tuple(tuple(tuple(1))); etc. - Array array{value}; - result_block = createBlockFromCollection(array, set_element_types, transform_null_in); + DataTypes value_types{value_type}; + result_block = createBlockFromCollection(array, value_types, set_element_types, transform_null_in); } else if (lhs_type_depth + 1 == rhs_type_depth) { /// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)) - WhichDataType rhs_which_type(value_type); if (rhs_which_type.isArray()) - result_block = createBlockFromCollection(value.get(), set_element_types, transform_null_in); + { + const DataTypeArray * value_array_type = assert_cast(value_type.get()); + size_t value_array_size = value.get().size(); + DataTypes value_types(value_array_size, value_array_type->getNestedType()); + result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); + } else if (rhs_which_type.isTuple()) - result_block = createBlockFromCollection(value.get(), set_element_types, transform_null_in); + { + const DataTypeTuple * value_tuple_type = assert_cast(value_type.get()); + const DataTypes & value_types = value_tuple_type->getElements(); + result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); + } else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unsupported type at the right-side of IN. Expected Array or Tuple. Actual {}", diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 8f32c918c61..3f972c36e47 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -24,8 +24,6 @@ #include #include -#include - namespace ProfileEvents { @@ -93,6 +91,7 @@ BackupImpl::BackupImpl( const std::optional & base_backup_info_, std::shared_ptr reader_, const ContextPtr & context_, + bool is_internal_backup_, bool use_same_s3_credentials_for_base_backup_) : backup_info(backup_info_) , backup_name_for_logging(backup_info.toStringForLogging()) @@ -101,7 +100,7 @@ BackupImpl::BackupImpl( , open_mode(OpenMode::READ) , reader(std::move(reader_)) , context(context_) - , is_internal_backup(false) + , is_internal_backup(is_internal_backup_) , version(INITIAL_BACKUP_VERSION) , base_backup_info(base_backup_info_) , use_same_s3_credentials_for_base_backup(use_same_s3_credentials_for_base_backup_) @@ -256,6 +255,7 @@ std::shared_ptr BackupImpl::getBaseBackupUnlocked() const params.backup_info = *base_backup_info; params.open_mode = OpenMode::READ; params.context = context; + params.is_internal_backup = is_internal_backup; /// use_same_s3_credentials_for_base_backup should be inherited for base backups params.use_same_s3_credentials_for_base_backup = use_same_s3_credentials_for_base_backup; diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h index 6fed5fe758b..2b27e2ab090 100644 --- a/src/Backups/BackupImpl.h +++ b/src/Backups/BackupImpl.h @@ -40,6 +40,7 @@ public: const std::optional & base_backup_info_, std::shared_ptr reader_, const ContextPtr & context_, + bool is_internal_backup_, bool use_same_s3_credentials_for_base_backup_); BackupImpl( diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 1a3fdf58cc4..454a0468e9f 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -438,7 +438,7 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_ String create_table_query_str = serializeAST(*create_table_query); bool is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table}); - auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query); + auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query, context->getCurrentDatabase()); bool table_has_data = backup->hasFiles(data_path_in_backup); std::lock_guard lock{mutex}; diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 81e3c104da1..03d156d1009 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -153,6 +153,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) params.base_backup_info, reader, params.context, + params.is_internal_backup, /* use_same_s3_credentials_for_base_backup*/ false); } else diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index c34dbe273f5..59ed9506af0 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -119,6 +119,7 @@ void registerBackupEngineS3(BackupFactory & factory) params.base_backup_info, reader, params.context, + params.is_internal_backup, params.use_same_s3_credentials_for_base_backup); } else diff --git a/src/Backups/registerBackupEnginesFileAndDisk.cpp b/src/Backups/registerBackupEnginesFileAndDisk.cpp index c633ebb6a5a..35263d39cba 100644 --- a/src/Backups/registerBackupEnginesFileAndDisk.cpp +++ b/src/Backups/registerBackupEnginesFileAndDisk.cpp @@ -177,6 +177,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory) params.base_backup_info, reader, params.context, + params.is_internal_backup, params.use_same_s3_credentials_for_base_backup); } else diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 84aaec17a5b..79352bc7258 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -222,7 +222,7 @@ add_object_library(clickhouse_storages_mergetree Storages/MergeTree) add_object_library(clickhouse_storages_statistics Storages/Statistics) add_object_library(clickhouse_storages_liveview Storages/LiveView) add_object_library(clickhouse_storages_windowview Storages/WindowView) -add_object_library(clickhouse_storages_s3queue Storages/S3Queue) +add_object_library(clickhouse_storages_s3queue Storages/ObjectStorageQueue) add_object_library(clickhouse_storages_materializedview Storages/MaterializedView) add_object_library(clickhouse_client Client) add_object_library(clickhouse_bridge BridgeHelper) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 854cc3fef8b..56573c15f32 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -44,13 +44,12 @@ #include #include #include -#include #include +#include #include #include #include -#include #include #include #include @@ -110,6 +109,7 @@ namespace ErrorCodes extern const int USER_SESSION_LIMIT_EXCEEDED; extern const int NOT_IMPLEMENTED; extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; + extern const int USER_EXPIRED; } } @@ -302,8 +302,29 @@ public: ClientBase::~ClientBase() = default; -ClientBase::ClientBase() = default; - +ClientBase::ClientBase( + int in_fd_, + int out_fd_, + int err_fd_, + std::istream & input_stream_, + std::ostream & output_stream_, + std::ostream & error_stream_ +) + : std_in(in_fd_) + , std_out(out_fd_) + , progress_indication(output_stream_, in_fd_, err_fd_) + , in_fd(in_fd_) + , out_fd(out_fd_) + , err_fd(err_fd_) + , input_stream(input_stream_) + , output_stream(output_stream_) + , error_stream(error_stream_) +{ + stdin_is_a_tty = isatty(in_fd); + stdout_is_a_tty = isatty(out_fd); + stderr_is_a_tty = isatty(err_fd); + terminal_width = getTerminalWidth(in_fd, err_fd); +} void ClientBase::setupSignalHandler() { @@ -330,7 +351,7 @@ void ClientBase::setupSignalHandler() } -ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error) +ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements) { std::unique_ptr parser; ASTPtr res; @@ -359,7 +380,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Setting if (!res) { - std::cerr << std::endl << message << std::endl << std::endl; + error_stream << std::endl << message << std::endl << std::endl; return nullptr; } } @@ -373,11 +394,11 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Setting if (is_interactive) { - std::cout << std::endl; - WriteBufferFromOStream res_buf(std::cout, 4096); + output_stream << std::endl; + WriteBufferFromOStream res_buf(output_stream, 4096); formatAST(*res, res_buf); res_buf.finalize(); - std::cout << std::endl << std::endl; + output_stream << std::endl << std::endl; } return res; @@ -481,7 +502,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query) if (need_render_progress && tty_buf) { if (select_into_file && !select_into_file_and_stdout) - std::cerr << "\r"; + error_stream << "\r"; progress_indication.writeProgress(*tty_buf); } } @@ -741,17 +762,17 @@ bool ClientBase::isRegularFile(int fd) void ClientBase::setDefaultFormatsAndCompressionFromConfiguration() { - if (config().has("output-format")) + if (getClientConfiguration().has("output-format")) { - default_output_format = config().getString("output-format"); + default_output_format = getClientConfiguration().getString("output-format"); is_default_format = false; } - else if (config().has("format")) + else if (getClientConfiguration().has("format")) { - default_output_format = config().getString("format"); + default_output_format = getClientConfiguration().getString("format"); is_default_format = false; } - else if (config().has("vertical")) + else if (getClientConfiguration().has("vertical")) { default_output_format = "Vertical"; is_default_format = false; @@ -777,17 +798,17 @@ void ClientBase::setDefaultFormatsAndCompressionFromConfiguration() default_output_format = "TSV"; } - if (config().has("input-format")) + if (getClientConfiguration().has("input-format")) { - default_input_format = config().getString("input-format"); + default_input_format = getClientConfiguration().getString("input-format"); } - else if (config().has("format")) + else if (getClientConfiguration().has("format")) { - default_input_format = config().getString("format"); + default_input_format = getClientConfiguration().getString("format"); } - else if (config().getString("table-file", "-") != "-") + else if (getClientConfiguration().getString("table-file", "-") != "-") { - auto file_name = config().getString("table-file"); + auto file_name = getClientConfiguration().getString("table-file"); std::optional format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(file_name); if (format_from_file_name) default_input_format = *format_from_file_name; @@ -803,7 +824,7 @@ void ClientBase::setDefaultFormatsAndCompressionFromConfiguration() default_input_format = "TSV"; } - format_max_block_size = config().getUInt64("format_max_block_size", + format_max_block_size = getClientConfiguration().getUInt64("format_max_block_size", global_context->getSettingsRef().max_block_size); /// Setting value from cmd arg overrides one from config @@ -813,7 +834,7 @@ void ClientBase::setDefaultFormatsAndCompressionFromConfiguration() } else { - insert_format_max_block_size = config().getUInt64("insert_format_max_block_size", + insert_format_max_block_size = getClientConfiguration().getUInt64("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); } } @@ -924,9 +945,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) const char * begin = full_query.data(); auto parsed_query = parseQuery(begin, begin + full_query.size(), global_context->getSettingsRef(), - /*allow_multi_statements=*/ false, - is_interactive, - ignore_error); + /*allow_multi_statements=*/ false); if (!parsed_query) return; @@ -1100,7 +1119,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa /// has been received yet. if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED && --retries_left) { - std::cerr << "Got a transient error from the server, will" + error_stream << "Got a transient error from the server, will" << " retry (" << retries_left << " retries left)"; } else @@ -1154,7 +1173,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b double elapsed = receive_watch.elapsedSeconds(); if (break_on_timeout && elapsed > receive_timeout.totalSeconds()) { - std::cout << "Timeout exceeded while receiving data from server." + output_stream << "Timeout exceeded while receiving data from server." << " Waited for " << static_cast(elapsed) << " seconds," << " timeout is " << receive_timeout.totalSeconds() << " seconds." << std::endl; @@ -1189,7 +1208,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b if (cancelled && is_interactive) { - std::cout << "Query was cancelled." << std::endl; + output_stream << "Query was cancelled." << std::endl; cancelled_printed = true; } } @@ -1308,9 +1327,9 @@ void ClientBase::onEndOfStream() if (is_interactive) { if (cancelled && !cancelled_printed) - std::cout << "Query was cancelled." << std::endl; + output_stream << "Query was cancelled." << std::endl; else if (!written_first_block) - std::cout << "Ok." << std::endl; + output_stream << "Ok." << std::endl; } } @@ -1863,7 +1882,7 @@ void ClientBase::cancelQuery() progress_indication.clearProgressOutput(*tty_buf); if (is_interactive) - std::cout << "Cancelling query." << std::endl; + output_stream << "Cancelling query." << std::endl; cancelled = true; } @@ -2026,7 +2045,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin { const String & new_database = use_query->getDatabase(); /// If the client initiates the reconnection, it takes the settings from the config. - config().setString("database", new_database); + getClientConfiguration().setString("database", new_database); /// If the connection initiates the reconnection, it uses its variable. connection->setDefaultDatabase(new_database); } @@ -2046,21 +2065,21 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (is_interactive) { - std::cout << std::endl; + output_stream << std::endl; if (!server_exception || processed_rows != 0) - std::cout << processed_rows << " row" << (processed_rows == 1 ? "" : "s") << " in set. "; - std::cout << "Elapsed: " << progress_indication.elapsedSeconds() << " sec. "; + output_stream << processed_rows << " row" << (processed_rows == 1 ? "" : "s") << " in set. "; + output_stream << "Elapsed: " << progress_indication.elapsedSeconds() << " sec. "; progress_indication.writeFinalProgress(); - std::cout << std::endl << std::endl; + output_stream << std::endl << std::endl; } - else if (print_time_to_stderr) + else if (getClientConfiguration().getBool("print-time-to-stderr", false)) { - std::cerr << progress_indication.elapsedSeconds() << "\n"; + error_stream << progress_indication.elapsedSeconds() << "\n"; } - if (!is_interactive && print_num_processed_rows) + if (!is_interactive && getClientConfiguration().getBool("print-num-processed-rows", false)) { - std::cout << "Processed rows: " << processed_rows << "\n"; + output_stream << "Processed rows: " << processed_rows << "\n"; } if (have_error && report_error) @@ -2110,9 +2129,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( { parsed_query = parseQuery(this_query_end, all_queries_end, global_context->getSettingsRef(), - /*allow_multi_statements=*/ true, - is_interactive, - ignore_error); + /*allow_multi_statements=*/ true); } catch (const Exception & e) { @@ -2271,7 +2288,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) catch (...) { // Surprisingly, this is a client error. A server error would - // have been reported without throwing (see onReceiveSeverException()). + // have been reported without throwing (see onReceiveExceptionFromServer()). client_exception = std::make_unique(getCurrentExceptionMessageAndPattern(print_stack_trace), getCurrentExceptionCode()); have_error = true; } @@ -2428,12 +2445,12 @@ void ClientBase::initQueryIdFormats() return; /// Initialize query_id_formats if any - if (config().has("query_id_formats")) + if (getClientConfiguration().has("query_id_formats")) { Poco::Util::AbstractConfiguration::Keys keys; - config().keys("query_id_formats", keys); + getClientConfiguration().keys("query_id_formats", keys); for (const auto & name : keys) - query_id_formats.emplace_back(name + ":", config().getString("query_id_formats." + name)); + query_id_formats.emplace_back(name + ":", getClientConfiguration().getString("query_id_formats." + name)); } if (query_id_formats.empty()) @@ -2478,9 +2495,9 @@ bool ClientBase::addMergeTreeSettings(ASTCreateQuery & ast_create) void ClientBase::runInteractive() { - if (config().has("query_id")) + if (getClientConfiguration().has("query_id")) throw Exception(ErrorCodes::BAD_ARGUMENTS, "query_id could be specified only in non-interactive mode"); - if (print_time_to_stderr) + if (getClientConfiguration().getBool("print-time-to-stderr", false)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "time option could be specified only in non-interactive mode"); initQueryIdFormats(); @@ -2493,9 +2510,9 @@ void ClientBase::runInteractive() { /// Load suggestion data from the server. if (global_context->getApplicationType() == Context::ApplicationType::CLIENT) - suggest->load(global_context, connection_parameters, config().getInt("suggestion_limit"), wait_for_suggestions_to_load); + suggest->load(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load); else if (global_context->getApplicationType() == Context::ApplicationType::LOCAL) - suggest->load(global_context, connection_parameters, config().getInt("suggestion_limit"), wait_for_suggestions_to_load); + suggest->load(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load); } if (home_path.empty()) @@ -2506,8 +2523,8 @@ void ClientBase::runInteractive() } /// Load command history if present. - if (config().has("history_file")) - history_file = config().getString("history_file"); + if (getClientConfiguration().has("history_file")) + history_file = getClientConfiguration().getString("history_file"); else { auto * history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE"); // NOLINT(concurrency-mt-unsafe) @@ -2528,7 +2545,7 @@ void ClientBase::runInteractive() { if (e.getErrno() != EEXIST) { - std::cerr << getCurrentExceptionMessage(false) << '\n'; + error_stream << getCurrentExceptionMessage(false) << '\n'; } } } @@ -2539,13 +2556,13 @@ void ClientBase::runInteractive() #if USE_REPLXX replxx::Replxx::highlighter_callback_t highlight_callback{}; - if (config().getBool("highlight", true)) + if (getClientConfiguration().getBool("highlight", true)) highlight_callback = highlight; ReplxxLineReader lr( *suggest, history_file, - config().has("multiline"), + getClientConfiguration().has("multiline"), query_extenders, query_delimiters, word_break_characters, @@ -2553,7 +2570,7 @@ void ClientBase::runInteractive() #else LineReader lr( history_file, - config().has("multiline"), + getClientConfiguration().has("multiline"), query_extenders, query_delimiters, word_break_characters); @@ -2633,7 +2650,7 @@ void ClientBase::runInteractive() { // If a separate connection loading suggestions failed to open a new session, // use the main session to receive them. - suggest->load(*connection, connection_parameters.timeouts, config().getInt("suggestion_limit"), global_context->getClientInfo()); + suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), global_context->getClientInfo()); } try @@ -2644,8 +2661,11 @@ void ClientBase::runInteractive() } catch (const Exception & e) { + if (e.code() == ErrorCodes::USER_EXPIRED) + break; + /// We don't need to handle the test hints in the interactive mode. - std::cerr << "Exception on client:" << std::endl << getExceptionMessage(e, print_stack_trace, true) << std::endl << std::endl; + error_stream << "Exception on client:" << std::endl << getExceptionMessage(e, print_stack_trace, true) << std::endl << std::endl; client_exception.reset(e.clone()); } @@ -2662,11 +2682,11 @@ void ClientBase::runInteractive() while (true); if (isNewYearMode()) - std::cout << "Happy new year." << std::endl; + output_stream << "Happy new year." << std::endl; else if (isChineseNewYearMode(local_tz)) - std::cout << "Happy Chinese new year. 春节快乐!" << std::endl; + output_stream << "Happy Chinese new year. 春节快乐!" << std::endl; else - std::cout << "Bye." << std::endl; + output_stream << "Bye." << std::endl; } @@ -2677,7 +2697,7 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name) ReadBufferFromFile in(file_name); readStringUntilEOF(queries_from_file, in); - if (!has_log_comment) + if (!getClientConfiguration().has("log_comment")) { Settings settings = global_context->getSettings(); /// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]" @@ -2786,13 +2806,13 @@ void ClientBase::clearTerminal() /// It is needed if garbage is left in terminal. /// Show cursor. It can be left hidden by invocation of previous programs. /// A test for this feature: perl -e 'print "x"x100000'; echo -ne '\033[0;0H\033[?25l'; clickhouse-client - std::cout << "\033[0J" "\033[?25h"; + output_stream << "\033[0J" "\033[?25h"; } void ClientBase::showClientVersion() { - std::cout << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; + output_stream << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; } namespace @@ -2859,7 +2879,10 @@ private: } - +/// Enable optimizations even in debug builds because otherwise options parsing becomes extremely slow affecting .sh tests +#if defined(__clang__) +#pragma clang optimize on +#endif void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) { if (allow_repeated_settings) @@ -3077,18 +3100,18 @@ void ClientBase::init(int argc, char ** argv) if (options.count("version-clean")) { - std::cout << VERSION_STRING; + output_stream << VERSION_STRING; exit(0); // NOLINT(concurrency-mt-unsafe) } if (options.count("verbose")) - config().setBool("verbose", true); + getClientConfiguration().setBool("verbose", true); /// Output of help message. if (options.count("help") || (options.count("host") && options["host"].as() == "elp")) /// If user writes -help instead of --help. { - if (config().getBool("verbose", false)) + if (getClientConfiguration().getBool("verbose", false)) printHelpMessage(options_description, true); else printHelpMessage(options_description_non_verbose, false); @@ -3096,72 +3119,75 @@ void ClientBase::init(int argc, char ** argv) } /// Common options for clickhouse-client and clickhouse-local. + + /// Output execution time to stderr in batch mode. if (options.count("time")) - print_time_to_stderr = true; + getClientConfiguration().setBool("print-time-to-stderr", true); if (options.count("query")) queries = options["query"].as>(); if (options.count("query_id")) - config().setString("query_id", options["query_id"].as()); + getClientConfiguration().setString("query_id", options["query_id"].as()); if (options.count("database")) - config().setString("database", options["database"].as()); + getClientConfiguration().setString("database", options["database"].as()); if (options.count("config-file")) - config().setString("config-file", options["config-file"].as()); + getClientConfiguration().setString("config-file", options["config-file"].as()); if (options.count("queries-file")) queries_files = options["queries-file"].as>(); if (options.count("multiline")) - config().setBool("multiline", true); + getClientConfiguration().setBool("multiline", true); if (options.count("multiquery")) - config().setBool("multiquery", true); + getClientConfiguration().setBool("multiquery", true); if (options.count("ignore-error")) - config().setBool("ignore-error", true); + getClientConfiguration().setBool("ignore-error", true); if (options.count("format")) - config().setString("format", options["format"].as()); + getClientConfiguration().setString("format", options["format"].as()); if (options.count("output-format")) - config().setString("output-format", options["output-format"].as()); + getClientConfiguration().setString("output-format", options["output-format"].as()); if (options.count("vertical")) - config().setBool("vertical", true); + getClientConfiguration().setBool("vertical", true); if (options.count("stacktrace")) - config().setBool("stacktrace", true); + getClientConfiguration().setBool("stacktrace", true); if (options.count("print-profile-events")) - config().setBool("print-profile-events", true); + getClientConfiguration().setBool("print-profile-events", true); if (options.count("profile-events-delay-ms")) - config().setUInt64("profile-events-delay-ms", options["profile-events-delay-ms"].as()); + getClientConfiguration().setUInt64("profile-events-delay-ms", options["profile-events-delay-ms"].as()); + /// Whether to print the number of processed rows at if (options.count("processed-rows")) - print_num_processed_rows = true; + getClientConfiguration().setBool("print-num-processed-rows", true); if (options.count("progress")) { switch (options["progress"].as()) { case DEFAULT: - config().setString("progress", "default"); + getClientConfiguration().setString("progress", "default"); break; case OFF: - config().setString("progress", "off"); + getClientConfiguration().setString("progress", "off"); break; case TTY: - config().setString("progress", "tty"); + getClientConfiguration().setString("progress", "tty"); break; case ERR: - config().setString("progress", "err"); + getClientConfiguration().setString("progress", "err"); break; } } if (options.count("echo")) - config().setBool("echo", true); + getClientConfiguration().setBool("echo", true); if (options.count("disable_suggestion")) - config().setBool("disable_suggestion", true); + getClientConfiguration().setBool("disable_suggestion", true); if (options.count("wait_for_suggestions_to_load")) - config().setBool("wait_for_suggestions_to_load", true); + getClientConfiguration().setBool("wait_for_suggestions_to_load", true); if (options.count("suggestion_limit")) - config().setInt("suggestion_limit", options["suggestion_limit"].as()); + getClientConfiguration().setInt("suggestion_limit", options["suggestion_limit"].as()); if (options.count("highlight")) - config().setBool("highlight", options["highlight"].as()); + getClientConfiguration().setBool("highlight", options["highlight"].as()); if (options.count("history_file")) - config().setString("history_file", options["history_file"].as()); + getClientConfiguration().setString("history_file", options["history_file"].as()); if (options.count("interactive")) - config().setBool("interactive", true); + getClientConfiguration().setBool("interactive", true); if (options.count("pager")) - config().setString("pager", options["pager"].as()); + getClientConfiguration().setString("pager", options["pager"].as()); if (options.count("log-level")) Poco::Logger::root().setLevel(options["log-level"].as()); @@ -3179,13 +3205,13 @@ void ClientBase::init(int argc, char ** argv) alias_names.reserve(options_description.main_description->options().size()); for (const auto& option : options_description.main_description->options()) alias_names.insert(option->long_name()); - argsToConfig(common_arguments, config(), 100, &alias_names); + argsToConfig(common_arguments, getClientConfiguration(), 100, &alias_names); } clearPasswordFromCommandLine(argc, argv); /// Limit on total memory usage - std::string max_client_memory_usage = config().getString("max_memory_usage_in_client", "0" /*default value*/); + std::string max_client_memory_usage = getClientConfiguration().getString("max_memory_usage_in_client", "0" /*default value*/); if (max_client_memory_usage != "0") { UInt64 max_client_memory_usage_int = parseWithSizeSuffix(max_client_memory_usage.c_str(), max_client_memory_usage.length()); @@ -3194,8 +3220,6 @@ void ClientBase::init(int argc, char ** argv) total_memory_tracker.setDescription("(total)"); total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); } - - has_log_comment = config().has("log_comment"); } } diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 220fcddc038..756400137ad 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -18,7 +18,6 @@ #include #include - namespace po = boost::program_options; @@ -67,13 +66,22 @@ class ClientBase : public Poco::Util::Application, public IHints<2> public: using Arguments = std::vector; - ClientBase(); + explicit ClientBase + ( + int in_fd_ = STDIN_FILENO, + int out_fd_ = STDOUT_FILENO, + int err_fd_ = STDERR_FILENO, + std::istream & input_stream_ = std::cin, + std::ostream & output_stream_ = std::cout, + std::ostream & error_stream_ = std::cerr + ); + ~ClientBase() override; void init(int argc, char ** argv); std::vector getAllRegisteredNames() const override { return cmd_options; } - static ASTPtr parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error); + ASTPtr parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements); protected: void runInteractive(); @@ -82,6 +90,9 @@ protected: char * argv0 = nullptr; void runLibFuzzer(); + /// This is the analogue of Poco::Application::config() + virtual Poco::Util::LayeredConfiguration & getClientConfiguration() = 0; + virtual bool processWithFuzzing(const String &) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Query processing with fuzzing is not implemented"); @@ -107,7 +118,7 @@ protected: String & query_to_execute, ASTPtr & parsed_query, const String & all_queries_text, std::unique_ptr & current_exception); - static void clearTerminal(); + void clearTerminal(); void showClientVersion(); using ProgramOptionsDescription = boost::program_options::options_description; @@ -129,6 +140,7 @@ protected: const std::vector & hosts_and_ports_arguments) = 0; virtual void processConfig() = 0; + /// Returns true if query processing was successful. bool processQueryText(const String & text); virtual void readArguments( @@ -205,7 +217,6 @@ protected: bool echo_queries = false; /// Print queries before execution in batch mode. bool ignore_error = false; /// In case of errors, don't print error message, continue to next query. Only applicable for non-interactive mode. - bool print_time_to_stderr = false; /// Output execution time to stderr in batch mode. std::optional suggest; bool load_suggestions = false; @@ -250,9 +261,9 @@ protected: ConnectionParameters connection_parameters; /// Buffer that reads from stdin in batch mode. - ReadBufferFromFileDescriptor std_in{STDIN_FILENO}; + ReadBufferFromFileDescriptor std_in; /// Console output. - WriteBufferFromFileDescriptor std_out{STDOUT_FILENO}; + WriteBufferFromFileDescriptor std_out; std::unique_ptr pager_cmd; /// The user can specify to redirect query output to a file. @@ -283,7 +294,6 @@ protected: bool need_render_profile_events = true; bool written_first_block = false; size_t processed_rows = 0; /// How many rows have been read or written. - bool print_num_processed_rows = false; /// Whether to print the number of processed rows at bool print_stack_trace = false; /// The last exception that was received from the server. Is used for the @@ -331,8 +341,14 @@ protected: bool cancelled = false; bool cancelled_printed = false; - /// Does log_comment has specified by user? - bool has_log_comment = false; + /// Unpacked descriptors and streams for the ease of use. + int in_fd = STDIN_FILENO; + int out_fd = STDOUT_FILENO; + int err_fd = STDERR_FILENO; + std::istream & input_stream; + std::ostream & output_stream; + std::ostream & error_stream; + }; } diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 19cd8cc4ee5..799c7511982 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -37,6 +38,7 @@ #include #include +#include #include "config.h" #if USE_SSL @@ -68,12 +70,23 @@ namespace ErrorCodes extern const int EMPTY_DATA_PASSED; } -Connection::~Connection() = default; +Connection::~Connection() +{ + try{ + if (connected) + Connection::disconnect(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} Connection::Connection(const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, [[maybe_unused]] const SSHKey & ssh_private_key_, + const String & jwt_, const String & quota_key_, const String & cluster_, const String & cluster_secret_, @@ -86,6 +99,7 @@ Connection::Connection(const String & host_, UInt16 port_, , ssh_private_key(ssh_private_key_) #endif , quota_key(quota_key_) + , jwt(jwt_) , cluster(cluster_) , cluster_secret(cluster_secret_) , client_name(client_name_) @@ -257,13 +271,31 @@ void Connection::connect(const ConnectionTimeouts & timeouts) void Connection::disconnect() { - maybe_compressed_out = nullptr; in = nullptr; last_input_packet_type.reset(); std::exception_ptr finalize_exception; + + try + { + // finalize() can write and throw an exception. + if (maybe_compressed_out) + maybe_compressed_out->finalize(); + } + catch (...) + { + /// Don't throw an exception here, it will leave Connection in invalid state. + finalize_exception = std::current_exception(); + + if (out) + { + out->cancel(); + out = nullptr; + } + } + maybe_compressed_out = nullptr; + try { - // finalize() can write to socket and throw an exception. if (out) out->finalize(); } @@ -276,6 +308,7 @@ void Connection::disconnect() if (socket) socket->close(); + socket = nullptr; connected = false; nonce.reset(); @@ -341,6 +374,11 @@ void Connection::sendHello() performHandshakeForSSHAuth(); } #endif + else if (!jwt.empty()) + { + writeStringBinary(EncodedUserInfo::JWT_AUTHENTICAION_MARKER, *out); + writeStringBinary(jwt, *out); + } else { writeStringBinary(user, *out); @@ -767,6 +805,8 @@ void Connection::sendQuery( } maybe_compressed_in.reset(); + if (maybe_compressed_out && maybe_compressed_out != out) + maybe_compressed_out->cancel(); maybe_compressed_out.reset(); block_in.reset(); block_logs_in.reset(); @@ -1310,6 +1350,7 @@ ServerConnectionPtr Connection::createConnection(const ConnectionParameters & pa parameters.user, parameters.password, parameters.ssh_private_key, + parameters.jwt, parameters.quota_key, "", /* cluster */ "", /* cluster_secret */ diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 9632eb9d948..0f4b3e436df 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -53,6 +53,7 @@ public: const String & default_database_, const String & user_, const String & password_, const SSHKey & ssh_private_key_, + const String & jwt_, const String & quota_key_, const String & cluster_, const String & cluster_secret_, @@ -173,6 +174,7 @@ private: SSHKey ssh_private_key; #endif String quota_key; + String jwt; /// For inter-server authorization String cluster; diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 774f3375f63..303bebc30d2 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -52,31 +52,11 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati /// changed the default value to "default" to fix the issue when the user in the prompt is blank user = config.getString("user", "default"); - if (!config.has("ssh-key-file")) + if (config.has("jwt")) { - bool password_prompt = false; - if (config.getBool("ask-password", false)) - { - if (config.has("password")) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified both --password and --ask-password. Remove one of them"); - password_prompt = true; - } - else - { - password = config.getString("password", ""); - /// if the value of --password is omitted, the password will be set implicitly to "\n" - if (password == ASK_PASSWORD) - password_prompt = true; - } - if (password_prompt) - { - std::string prompt{"Password for user (" + user + "): "}; - char buf[1000] = {}; - if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0)) - password = result; - } + jwt = config.getString("jwt"); } - else + else if (config.has("ssh-key-file")) { #if USE_SSH std::string filename = config.getString("ssh-key-file"); @@ -102,6 +82,30 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif } + else + { + bool password_prompt = false; + if (config.getBool("ask-password", false)) + { + if (config.has("password")) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified both --password and --ask-password. Remove one of them"); + password_prompt = true; + } + else + { + password = config.getString("password", ""); + /// if the value of --password is omitted, the password will be set implicitly to "\n" + if (password == ASK_PASSWORD) + password_prompt = true; + } + if (password_prompt) + { + std::string prompt{"Password for user (" + user + "): "}; + char buf[1000] = {}; + if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0)) + password = result; + } + } quota_key = config.getString("quota_key", ""); @@ -139,7 +143,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati } UInt16 ConnectionParameters::getPortFromConfig(const Poco::Util::AbstractConfiguration & config, - std::string connection_host) + const std::string & connection_host) { bool is_secure = enableSecureConnection(config, connection_host); return config.getInt("port", diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index f23522d48b3..c305c7813f2 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -22,6 +22,7 @@ struct ConnectionParameters std::string password; std::string quota_key; SSHKey ssh_private_key; + std::string jwt; Protocol::Secure security = Protocol::Secure::Disable; Protocol::Compression compression = Protocol::Compression::Enable; ConnectionTimeouts timeouts; @@ -30,7 +31,7 @@ struct ConnectionParameters ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host); ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, std::optional port); - static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config, std::string connection_host); + static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & connection_host); /// Ask to enter the user's password if password option contains this value. /// "\n" is used because there is hardly a chance that a user would use '\n' as password. diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index d35c2552461..725a5e91ac0 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -123,7 +123,7 @@ protected: { return std::make_shared( host, port, - default_database, user, password, SSHKey(), quota_key, + default_database, user, password, SSHKey(), /*jwt*/ "", quota_key, cluster, cluster_secret, client_name, compression, secure); } diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index fb4d9a6bdcc..8c993f906e0 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -255,6 +255,17 @@ void HedgedConnections::sendCancel() if (!sent_query || cancelled) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot cancel. Either no query sent or already cancelled."); + /// All hedged connections should be stopped, since otherwise before the + /// HedgedConnectionsFactory will be destroyed (that will happen from + /// QueryPipeline dtor) they could still do some work. + /// And not only this does not make sense, but it also could lead to + /// use-after-free of the current_thread, since the thread from which they + /// had been created differs from the thread where the dtor of + /// QueryPipeline will be called and the initial thread could be already + /// destroyed (especially when the system is under pressure). + if (hedged_connections_factory.hasEventsInProcess()) + hedged_connections_factory.stopChoosingReplicas(); + cancelled = true; for (auto & offset_status : offset_states) diff --git a/src/Client/LineReader.cpp b/src/Client/LineReader.cpp index b3559657ced..487ef232fdc 100644 --- a/src/Client/LineReader.cpp +++ b/src/Client/LineReader.cpp @@ -23,14 +23,6 @@ void trim(String & s) s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end()); } -/// Check if multi-line query is inserted from the paste buffer. -/// Allows delaying the start of query execution until the entirety of query is inserted. -bool hasInputData() -{ - pollfd fd{STDIN_FILENO, POLLIN, 0}; - return poll(&fd, 1, 0) == 1; -} - struct NoCaseCompare { bool operator()(const std::string & str1, const std::string & str2) @@ -63,6 +55,14 @@ void addNewWords(Words & to, const Words & from, Compare comp) namespace DB { +/// Check if multi-line query is inserted from the paste buffer. +/// Allows delaying the start of query execution until the entirety of query is inserted. +bool LineReader::hasInputData() const +{ + pollfd fd{in_fd, POLLIN, 0}; + return poll(&fd, 1, 0) == 1; +} + replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length, const char * word_break_characters) { std::string_view last_word; @@ -131,11 +131,22 @@ void LineReader::Suggest::addWords(Words && new_words) // NOLINT(cppcoreguidelin } } -LineReader::LineReader(const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_) +LineReader::LineReader( + const String & history_file_path_, + bool multiline_, + Patterns extenders_, + Patterns delimiters_, + std::istream & input_stream_, + std::ostream & output_stream_, + int in_fd_ +) : history_file_path(history_file_path_) , multiline(multiline_) , extenders(std::move(extenders_)) , delimiters(std::move(delimiters_)) + , input_stream(input_stream_) + , output_stream(output_stream_) + , in_fd(in_fd_) { /// FIXME: check extender != delimiter } @@ -212,9 +223,9 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt) input.clear(); { - std::cout << prompt; - std::getline(std::cin, input); - if (!std::cin.good()) + output_stream << prompt; + std::getline(input_stream, input); + if (!input_stream.good()) return ABORT; } diff --git a/src/Client/LineReader.h b/src/Client/LineReader.h index fc19eaa5667..0172bd7ec22 100644 --- a/src/Client/LineReader.h +++ b/src/Client/LineReader.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -37,7 +39,16 @@ public: using Patterns = std::vector; - LineReader(const String & history_file_path, bool multiline, Patterns extenders, Patterns delimiters); + LineReader( + const String & history_file_path, + bool multiline, + Patterns extenders, + Patterns delimiters, + std::istream & input_stream_ = std::cin, + std::ostream & output_stream_ = std::cout, + int in_fd_ = STDIN_FILENO + ); + virtual ~LineReader() = default; /// Reads the whole line until delimiter (in multiline mode) or until the last line without extender. @@ -56,6 +67,8 @@ public: virtual void enableBracketedPaste() {} virtual void disableBracketedPaste() {} + bool hasInputData() const; + protected: enum InputStatus { @@ -77,6 +90,10 @@ protected: virtual InputStatus readOneLine(const String & prompt); virtual void addToHistory(const String &) {} + + std::istream & input_stream; + std::ostream & output_stream; + int in_fd; }; } diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index c7494e31605..072184e0a66 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -16,7 +16,10 @@ #include #include #include - +#include +#include +#include +#include namespace DB { @@ -151,12 +154,26 @@ void LocalConnection::sendQuery( state->block = sample; String current_format = "Values"; + + const auto & settings = context->getSettingsRef(); const char * begin = state->query.data(); - auto parsed_query = ClientBase::parseQuery(begin, begin + state->query.size(), - context->getSettingsRef(), - /*allow_multi_statements=*/ false, - /*is_interactive=*/ false, - /*ignore_error=*/ false); + const char * end = begin + state->query.size(); + const Dialect & dialect = settings.dialect; + + std::unique_ptr parser; + if (dialect == Dialect::kusto) + parser = std::make_unique(end, settings.allow_settings_after_format_in_insert); + else if (dialect == Dialect::prql) + parser = std::make_unique(settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); + else + parser = std::make_unique(end, settings.allow_settings_after_format_in_insert); + + ASTPtr parsed_query; + if (dialect == Dialect::kusto) + parsed_query = parseKQLQueryAndMovePosition(*parser, begin, end, "", /*allow_multi_statements*/false, settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); + else + parsed_query = parseQueryAndMovePosition(*parser, begin, end, "", /*allow_multi_statements*/false, settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); + if (const auto * insert = parsed_query->as()) { if (!insert->format.empty()) @@ -341,22 +358,18 @@ bool LocalConnection::poll(size_t) if (!state->is_finished) { - if (send_progress && (state->after_send_progress.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) - { - state->after_send_progress.restart(); - next_packet_type = Protocol::Server::Progress; + if (needSendProgressOrMetrics()) return true; - } - - if (send_profile_events && (state->after_send_profile_events.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) - { - sendProfileEvents(); - return true; - } try { - pollImpl(); + while (pollImpl()) + { + LOG_DEBUG(&Poco::Logger::get("LocalConnection"), "Executor timeout encountered, will retry"); + + if (needSendProgressOrMetrics()) + return true; + } } catch (const Exception & e) { @@ -451,12 +464,34 @@ bool LocalConnection::poll(size_t) return false; } +bool LocalConnection::needSendProgressOrMetrics() +{ + if (send_progress && (state->after_send_progress.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) + { + state->after_send_progress.restart(); + next_packet_type = Protocol::Server::Progress; + return true; + } + + if (send_profile_events && (state->after_send_profile_events.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) + { + sendProfileEvents(); + return true; + } + + return false; +} + bool LocalConnection::pollImpl() { Block block; auto next_read = pullBlock(block); - if (block && !state->io.null_format) + if (!block && next_read) + { + return true; + } + else if (block && !state->io.null_format) { state->block.emplace(block); } @@ -465,7 +500,7 @@ bool LocalConnection::pollImpl() state->is_finished = true; } - return true; + return false; } Packet LocalConnection::receivePacket() diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 899d134cce5..fb6fa1b55eb 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -151,8 +151,11 @@ private: void sendProfileEvents(); + /// Returns true on executor timeout, meaning a retryable error. bool pollImpl(); + bool needSendProgressOrMetrics(); + ContextMutablePtr query_context; Session session; diff --git a/src/Client/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp index 9e0f5946205..46600168695 100644 --- a/src/Client/ReplxxLineReader.cpp +++ b/src/Client/ReplxxLineReader.cpp @@ -297,8 +297,15 @@ ReplxxLineReader::ReplxxLineReader( Patterns extenders_, Patterns delimiters_, const char word_break_characters_[], - replxx::Replxx::highlighter_callback_t highlighter_) - : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_)) + replxx::Replxx::highlighter_callback_t highlighter_, + [[ maybe_unused ]] std::istream & input_stream_, + [[ maybe_unused ]] std::ostream & output_stream_, + [[ maybe_unused ]] int in_fd_, + [[ maybe_unused ]] int out_fd_, + [[ maybe_unused ]] int err_fd_ +) + : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_), input_stream_, output_stream_, in_fd_) + , highlighter(std::move(highlighter_)) , word_break_characters(word_break_characters_) , editor(getEditor()) { @@ -471,7 +478,7 @@ ReplxxLineReader::ReplxxLineReader( ReplxxLineReader::~ReplxxLineReader() { - if (close(history_file_fd)) + if (history_file_fd >= 0 && close(history_file_fd)) rx.print("Close of history file failed: %s\n", errnoToString().c_str()); } @@ -496,7 +503,7 @@ void ReplxxLineReader::addToHistory(const String & line) // but replxx::Replxx::history_load() does not // and that is why flock() is added here. bool locked = false; - if (flock(history_file_fd, LOCK_EX)) + if (history_file_fd >= 0 && flock(history_file_fd, LOCK_EX)) rx.print("Lock of history file failed: %s\n", errnoToString().c_str()); else locked = true; @@ -507,7 +514,7 @@ void ReplxxLineReader::addToHistory(const String & line) if (!rx.history_save(history_file_path)) rx.print("Saving history failed: %s\n", errnoToString().c_str()); - if (locked && 0 != flock(history_file_fd, LOCK_UN)) + if (history_file_fd >= 0 && locked && 0 != flock(history_file_fd, LOCK_UN)) rx.print("Unlock of history file failed: %s\n", errnoToString().c_str()); } diff --git a/src/Client/ReplxxLineReader.h b/src/Client/ReplxxLineReader.h index 6ad149e38f2..c46080420ef 100644 --- a/src/Client/ReplxxLineReader.h +++ b/src/Client/ReplxxLineReader.h @@ -1,6 +1,7 @@ #pragma once -#include "LineReader.h" +#include +#include #include namespace DB @@ -9,14 +10,22 @@ namespace DB class ReplxxLineReader : public LineReader { public: - ReplxxLineReader( + ReplxxLineReader + ( Suggest & suggest, const String & history_file_path, bool multiline, Patterns extenders_, Patterns delimiters_, const char word_break_characters_[], - replxx::Replxx::highlighter_callback_t highlighter_); + replxx::Replxx::highlighter_callback_t highlighter_, + std::istream & input_stream_ = std::cin, + std::ostream & output_stream_ = std::cout, + int in_fd_ = STDIN_FILENO, + int out_fd_ = STDOUT_FILENO, + int err_fd_ = STDERR_FILENO + ); + ~ReplxxLineReader() override; void enableBracketedPaste() override; diff --git a/src/Common/Config/ConfigReloader.cpp b/src/Common/Config/ConfigReloader.cpp index b2c07dacf07..769a63c036b 100644 --- a/src/Common/Config/ConfigReloader.cpp +++ b/src/Common/Config/ConfigReloader.cpp @@ -19,8 +19,7 @@ ConfigReloader::ConfigReloader( const std::string & preprocessed_dir_, zkutil::ZooKeeperNodeCache && zk_node_cache_, const zkutil::EventPtr & zk_changed_event_, - Updater && updater_, - bool already_loaded) + Updater && updater_) : config_path(config_path_) , extra_paths(extra_paths_) , preprocessed_dir(preprocessed_dir_) @@ -28,10 +27,15 @@ ConfigReloader::ConfigReloader( , zk_changed_event(zk_changed_event_) , updater(std::move(updater_)) { - if (!already_loaded) - reloadIfNewer(/* force = */ true, /* throw_on_error = */ true, /* fallback_to_preprocessed = */ true, /* initial_loading = */ true); -} + auto config = reloadIfNewer(/* force = */ true, /* throw_on_error = */ true, /* fallback_to_preprocessed = */ true, /* initial_loading = */ true); + if (config.has_value()) + reload_interval = std::chrono::milliseconds(config->configuration->getInt64("config_reload_interval_ms", DEFAULT_RELOAD_INTERVAL.count())); + else + reload_interval = DEFAULT_RELOAD_INTERVAL; + + LOG_TRACE(log, "Config reload interval set to {}ms", reload_interval.count()); +} void ConfigReloader::start() { @@ -82,7 +86,17 @@ void ConfigReloader::run() if (quit) return; - reloadIfNewer(zk_changed, /* throw_on_error = */ false, /* fallback_to_preprocessed = */ false, /* initial_loading = */ false); + auto config = reloadIfNewer(zk_changed, /* throw_on_error = */ false, /* fallback_to_preprocessed = */ false, /* initial_loading = */ false); + if (config.has_value()) + { + auto new_reload_interval = std::chrono::milliseconds(config->configuration->getInt64("config_reload_interval_ms", DEFAULT_RELOAD_INTERVAL.count())); + if (new_reload_interval != reload_interval) + { + reload_interval = new_reload_interval; + LOG_TRACE(log, "Config reload interval changed to {}ms", reload_interval.count()); + } + } + } catch (...) { @@ -92,7 +106,7 @@ void ConfigReloader::run() } } -void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading) +std::optional ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading) { std::lock_guard lock(reload_mutex); @@ -120,7 +134,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac throw; tryLogCurrentException(log, "ZooKeeper error when loading config from '" + config_path + "'"); - return; + return std::nullopt; } catch (...) { @@ -128,7 +142,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac throw; tryLogCurrentException(log, "Error loading config from '" + config_path + "'"); - return; + return std::nullopt; } config_processor.savePreprocessedConfig(loaded_config, preprocessed_dir); @@ -154,11 +168,13 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac if (throw_on_error) throw; tryLogCurrentException(log, "Error updating configuration from '" + config_path + "' config."); - return; + return std::nullopt; } LOG_DEBUG(log, "Loaded config '{}', performed update on configuration", config_path); + return loaded_config; } + return std::nullopt; } struct ConfigReloader::FileWithTimestamp diff --git a/src/Common/Config/ConfigReloader.h b/src/Common/Config/ConfigReloader.h index 13a797bad08..89ef0fd8a0b 100644 --- a/src/Common/Config/ConfigReloader.h +++ b/src/Common/Config/ConfigReloader.h @@ -17,8 +17,6 @@ namespace Poco { class Logger; } namespace DB { -class Context; - /** Every two seconds checks configuration files for update. * If configuration is changed, then config will be reloaded by ConfigProcessor * and the reloaded config will be applied via Updater functor. @@ -27,6 +25,8 @@ class Context; class ConfigReloader { public: + static constexpr auto DEFAULT_RELOAD_INTERVAL = std::chrono::milliseconds(2000); + using Updater = std::function; ConfigReloader( @@ -35,8 +35,7 @@ public: const std::string & preprocessed_dir, zkutil::ZooKeeperNodeCache && zk_node_cache, const zkutil::EventPtr & zk_changed_event, - Updater && updater, - bool already_loaded); + Updater && updater); ~ConfigReloader(); @@ -53,7 +52,7 @@ public: private: void run(); - void reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading); + std::optional reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading); struct FileWithTimestamp; @@ -67,8 +66,6 @@ private: FilesChangesTracker getNewFileList() const; - static constexpr auto reload_interval = std::chrono::seconds(2); - LoggerPtr log = getLogger("ConfigReloader"); std::string config_path; @@ -85,6 +82,8 @@ private: std::atomic quit{false}; ThreadFromGlobalPool thread; + std::chrono::milliseconds reload_interval = DEFAULT_RELOAD_INTERVAL; + /// Locked inside reloadIfNewer. std::mutex reload_mutex; }; diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 731c72d65f2..8516a88c7af 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -1,6 +1,7 @@ #include +// clang-format off /// Available metrics. Add something here as you wish. /// If the metric is generic (i.e. not server specific) /// it should be also added to src/Coordination/KeeperConstant.cpp diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp index 99da3b75429..8439c01b22c 100644 --- a/src/Common/Dwarf.cpp +++ b/src/Common/Dwarf.cpp @@ -202,7 +202,10 @@ uint64_t readU64(std::string_view & sp) { SAFE_CHECK(sp.size() >= N, "underflow"); uint64_t x = 0; - memcpy(&x, sp.data(), N); + if constexpr (std::endian::native == std::endian::little) + memcpy(&x, sp.data(), N); + else + memcpy(reinterpret_cast(&x) + sizeof(uint64_t) - N, sp.data(), N); sp.remove_prefix(N); return x; } diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 29993ed33e4..b1b8e2367a4 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -602,6 +602,8 @@ M(721, DEPRECATED_FUNCTION) \ M(722, ASYNC_LOAD_WAIT_FAILED) \ M(723, PARQUET_EXCEPTION) \ + M(724, TOO_MANY_TABLES) \ + M(725, TOO_MANY_DATABASES) \ \ M(900, DISTRIBUTED_CACHE_ERROR) \ M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ diff --git a/src/Common/ErrorCodes.h b/src/Common/ErrorCodes.h index 8879779a5e2..11a163becbe 100644 --- a/src/Common/ErrorCodes.h +++ b/src/Common/ErrorCodes.h @@ -1,8 +1,6 @@ #pragma once #include -#include -#include #include #include #include @@ -35,7 +33,7 @@ namespace ErrorCodes struct Error { - /// Number of times Exception with this ErrorCode had been throw. + /// Number of times Exception with this ErrorCode has been thrown. Value count = 0; /// Time of the last error. UInt64 error_time_ms = 0; diff --git a/src/Common/GetPriorityForLoadBalancing.cpp b/src/Common/GetPriorityForLoadBalancing.cpp index d4c6f89ff92..dc5704ef6b5 100644 --- a/src/Common/GetPriorityForLoadBalancing.cpp +++ b/src/Common/GetPriorityForLoadBalancing.cpp @@ -60,4 +60,26 @@ GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t return get_priority; } +/// Some load balancing strategies (such as "nearest hostname") have preferred nodes to connect to. +/// Usually it's a node in the same data center/availability zone. +/// For other strategies there's no difference between nodes. +bool GetPriorityForLoadBalancing::hasOptimalNode() const +{ + switch (load_balancing) + { + case LoadBalancing::NEAREST_HOSTNAME: + return true; + case LoadBalancing::HOSTNAME_LEVENSHTEIN_DISTANCE: + return true; + case LoadBalancing::IN_ORDER: + return false; + case LoadBalancing::RANDOM: + return false; + case LoadBalancing::FIRST_OR_RANDOM: + return true; + case LoadBalancing::ROUND_ROBIN: + return false; + } +} + } diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h index 0de99730977..01dae9a1289 100644 --- a/src/Common/GetPriorityForLoadBalancing.h +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -30,6 +30,8 @@ public: Func getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const; + bool hasOptimalNode() const; + std::vector hostname_prefix_distance; /// Prefix distances from name of this host to the names of hosts of pools. std::vector hostname_levenshtein_distance; /// Levenshtein Distances from name of this host to the names of hosts of pools. diff --git a/src/Common/HilbertUtils.h b/src/Common/HilbertUtils.h new file mode 100644 index 00000000000..f0f8360de90 --- /dev/null +++ b/src/Common/HilbertUtils.h @@ -0,0 +1,161 @@ +#pragma once + +#include +#include +#include "base/types.h" +#include +#include +#include +#include + + +namespace HilbertDetails +{ + + struct Segment // represents [begin; end], all bounds are included + { + UInt64 begin; + UInt64 end; + }; + +} + +/* + Given the range of values of hilbert code - and this function will return segments of the Hilbert curve + such that each of them lies in a whole domain (aka square) + 0 1 + ┌────────────────────────────────┐ + │ │ │ + │ │ │ + 0 │ 00xxx │ 11xxx │ + │ | │ | │ + │ | │ | │ + │_______________│________________│ + │ | │ | │ + │ | │ | │ + │ | │ | │ + 1 │ 01xxx______│_____10xxx │ + │ │ │ + │ │ │ + └────────────────────────────────┘ + Imagine a square, one side of which is a x-axis, other is a y-axis. + First approximation of the Hilbert curve is on the picture - U curve. + So we divide Hilbert Code Interval on 4 parts each of which is represented by a square + and look where the given interval [start, finish] is located: + [00xxxxxx | 01xxxxxx | 10xxxxxx | 11xxxxxx ] + 1: [ ] + start = 0010111 end = 10111110 + 2: [ ] [ ] + If it contains a whole sector (that represents a domain=square), + then we take this range. In the example above - it is a sector [01000000, 01111111] + Then we dig into the recursion and check the remaining ranges. + Note that after the first call all other ranges in the recursion will have either start or finish on the end of a range, + so the complexity of the algorithm will be O(logN), where N is the maximum of hilbert code. +*/ +template +void segmentBinaryPartition(UInt64 start, UInt64 finish, UInt8 current_bits, F && callback) +{ + if (current_bits == 0) + return; + + const auto next_bits = current_bits - 2; + const auto history = current_bits == 64 ? 0 : (start >> current_bits) << current_bits; + + const auto chunk_mask = 0b11; + const auto start_chunk = (start >> next_bits) & chunk_mask; + const auto finish_chunk = (finish >> next_bits) & chunk_mask; + + auto construct_range = [next_bits, history](UInt64 chunk) + { + return HilbertDetails::Segment{ + .begin = history + (chunk << next_bits), + .end = history + ((chunk + 1) << next_bits) - 1 + }; + }; + + if (start_chunk == finish_chunk) + { + if ((finish - start + 1) == (1 << next_bits)) // it means that [begin, end] is a range + { + callback(HilbertDetails::Segment{.begin = start, .end = finish}); + return; + } + segmentBinaryPartition(start, finish, next_bits, callback); + return; + } + + for (auto range_chunk = start_chunk + 1; range_chunk < finish_chunk; ++range_chunk) + { + callback(construct_range(range_chunk)); + } + + const auto start_range = construct_range(start_chunk); + if (start == start_range.begin) + { + callback(start_range); + } + else + { + segmentBinaryPartition(start, start_range.end, next_bits, callback); + } + + const auto finish_range = construct_range(finish_chunk); + if (finish == finish_range.end) + { + callback(finish_range); + } + else + { + segmentBinaryPartition(finish_range.begin, finish, next_bits, callback); + } +} + +// Given 2 points representing ends of the range of Hilbert Curve that lies in a whole domain. +// The are neighbour corners of some square - and the function returns ranges of both sides of this square +inline std::array, 2> createRangeFromCorners(UInt64 x1, UInt64 y1, UInt64 x2, UInt64 y2) +{ + UInt64 dist_x = x1 > x2 ? x1 - x2 : x2 - x1; + UInt64 dist_y = y1 > y2 ? y1 - y2 : y2 - y1; + UInt64 range_size = std::max(dist_x, dist_y); + bool contains_minimum_vertice = x1 % (range_size + 1) == 0; + if (contains_minimum_vertice) + { + UInt64 x_min = std::min(x1, x2); + UInt64 y_min = std::min(y1, y2); + return { + std::pair{x_min, x_min + range_size}, + std::pair{y_min, y_min + range_size} + }; + } + else + { + UInt64 x_max = std::max(x1, x2); + UInt64 y_max = std::max(y1, y2); + chassert(x_max >= range_size); + chassert(y_max >= range_size); + return { + std::pair{x_max - range_size, x_max}, + std::pair{y_max - range_size, y_max} + }; + } +} + +/** Unpack an interval of Hilbert curve to hyperrectangles covered by it across N dimensions. + */ +template +void hilbertIntervalToHyperrectangles2D(UInt64 first, UInt64 last, F && callback) +{ + const auto equal_bits_count = getLeadingZeroBits(last | first); + const auto even_equal_bits_count = equal_bits_count - equal_bits_count % 2; + segmentBinaryPartition(first, last, 64 - even_equal_bits_count, [&](HilbertDetails::Segment range) + { + auto interval1 = DB::FunctionHilbertDecode2DWIthLookupTableImpl<3>::decode(range.begin); + auto interval2 = DB::FunctionHilbertDecode2DWIthLookupTableImpl<3>::decode(range.end); + + std::array, 2> unpacked_range = createRangeFromCorners( + std::get<0>(interval1), std::get<1>(interval1), + std::get<0>(interval2), std::get<1>(interval2)); + + callback(unpacked_range); + }); +} diff --git a/src/Common/ICachePolicy.h b/src/Common/ICachePolicy.h index 8aa75d1d81f..301a5c6cbbd 100644 --- a/src/Common/ICachePolicy.h +++ b/src/Common/ICachePolicy.h @@ -48,7 +48,7 @@ public: /// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also /// possible to store other, non-hashed data in the key. In that case, the found key is potentially different from the provided key. - /// Then use getWithKey() to also return the found key including it's non-hashed data. + /// Then use getWithKey() to also return the found key including its non-hashed data. virtual MappedPtr get(const Key & key) = 0; virtual std::optional getWithKey(const Key &) = 0; diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index fef1c4a2b75..d98373b6c55 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -3,6 +3,7 @@ #include +// clang-format off /// Available events. Add something here as you wish. /// If the event is generic (i.e. not server specific) /// it should be also added to src/Coordination/KeeperConstant.cpp @@ -14,6 +15,7 @@ M(QueriesWithSubqueries, "Count queries with all subqueries") \ M(SelectQueriesWithSubqueries, "Count SELECT queries with all subqueries") \ M(InsertQueriesWithSubqueries, "Count INSERT queries with all subqueries") \ + M(SelectQueriesWithPrimaryKeyUsage, "Count SELECT queries which use the primary key to evaluate the WHERE condition") \ M(AsyncInsertQuery, "Same as InsertQuery, but only for asynchronous INSERT queries.") \ M(AsyncInsertBytes, "Data size in bytes of asynchronous INSERT queries.") \ M(AsyncInsertRows, "Number of rows inserted by asynchronous INSERT queries.") \ @@ -635,11 +637,11 @@ The server successfully detected this situation and will download merged part fr M(S3QueueSetFileProcessingMicroseconds, "Time spent to set file as processing")\ M(S3QueueSetFileProcessedMicroseconds, "Time spent to set file as processed")\ M(S3QueueSetFileFailedMicroseconds, "Time spent to set file as failed")\ - M(S3QueueFailedFiles, "Number of files which failed to be processed")\ - M(S3QueueProcessedFiles, "Number of files which were processed")\ - M(S3QueueCleanupMaxSetSizeOrTTLMicroseconds, "Time spent to set file as failed")\ - M(S3QueuePullMicroseconds, "Time spent to read file data")\ - M(S3QueueLockLocalFileStatusesMicroseconds, "Time spent to lock local file statuses")\ + M(ObjectStorageQueueFailedFiles, "Number of files which failed to be processed")\ + M(ObjectStorageQueueProcessedFiles, "Number of files which were processed")\ + M(ObjectStorageQueueCleanupMaxSetSizeOrTTLMicroseconds, "Time spent to set file as failed")\ + M(ObjectStorageQueuePullMicroseconds, "Time spent to read file data")\ + M(ObjectStorageQueueLockLocalFileStatusesMicroseconds, "Time spent to lock local file statuses")\ \ M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\ M(IOUringSQEsSubmitted, "Total number of io_uring SQEs submitted") \ diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 7b07c72824a..0b482cb09be 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -92,19 +92,19 @@ void ProgressIndication::writeFinalProgress() if (progress.read_rows < 1000) return; - std::cout << "Processed " << formatReadableQuantity(progress.read_rows) << " rows, " + output_stream << "Processed " << formatReadableQuantity(progress.read_rows) << " rows, " << formatReadableSizeWithDecimalSuffix(progress.read_bytes); UInt64 elapsed_ns = getElapsedNanoseconds(); if (elapsed_ns) - std::cout << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., " + output_stream << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., " << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)"; else - std::cout << ". "; + output_stream << ". "; auto peak_memory_usage = getMemoryUsage().peak; if (peak_memory_usage >= 0) - std::cout << "\nPeak memory usage: " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << "."; + output_stream << "\nPeak memory usage: " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << "."; } void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) @@ -125,7 +125,7 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) const char * indicator = indicators[increment % 8]; - size_t terminal_width = getTerminalWidth(); + size_t terminal_width = getTerminalWidth(in_fd, err_fd); if (!written_progress_chars) { diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h index a9965785889..ae39fb49bcc 100644 --- a/src/Common/ProgressIndication.h +++ b/src/Common/ProgressIndication.h @@ -32,6 +32,19 @@ using HostToTimesMap = std::unordered_map; class ProgressIndication { public: + + explicit ProgressIndication + ( + std::ostream & output_stream_ = std::cout, + int in_fd_ = STDIN_FILENO, + int err_fd_ = STDERR_FILENO + ) + : output_stream(output_stream_), + in_fd(in_fd_), + err_fd(err_fd_) + { + } + /// Write progress bar. void writeProgress(WriteBufferFromFileDescriptor & message); void clearProgressOutput(WriteBufferFromFileDescriptor & message); @@ -103,6 +116,10 @@ private: /// - hosts_data/cpu_usage_meter (guarded with profile_events_mutex) mutable std::mutex profile_events_mutex; mutable std::mutex progress_mutex; + + std::ostream & output_stream; + int in_fd; + int err_fd; }; } diff --git a/src/Common/Scheduler/ISchedulerNode.h b/src/Common/Scheduler/ISchedulerNode.h index df8d86f379c..81b491b0eda 100644 --- a/src/Common/Scheduler/ISchedulerNode.h +++ b/src/Common/Scheduler/ISchedulerNode.h @@ -11,10 +11,10 @@ #include #include +#include #include #include -#include #include #include #include @@ -30,6 +30,8 @@ namespace ErrorCodes } class ISchedulerNode; +class EventQueue; +using EventId = UInt64; inline const Poco::Util::AbstractConfiguration & emptyConfig() { @@ -82,6 +84,115 @@ struct SchedulerNodeInfo } }; + +/* + * Node of hierarchy for scheduling requests for resource. Base class for all + * kinds of scheduling elements (queues, policies, constraints and schedulers). + * + * Root node is a scheduler, which has it's thread to dequeue requests, + * execute requests (see ResourceRequest) and process events in a thread-safe manner. + * Immediate children of the scheduler represent independent resources. + * Each resource has it's own hierarchy to achieve required scheduling policies. + * Non-leaf nodes do not hold requests, but keep scheduling state + * (e.g. consumption history, amount of in-flight requests, etc). + * Leafs of hierarchy are queues capable of holding pending requests. + * + * scheduler (SchedulerRoot) + * / \ + * constraint constraint (SemaphoreConstraint) + * | | + * policy policy (PriorityPolicy) + * / \ / \ + * q1 q2 q3 q4 (FifoQueue) + * + * Dequeueing request from an inner node will dequeue request from one of active leaf-queues in its subtree. + * Node is considered to be active iff: + * - it has at least one pending request in one of leaves of it's subtree; + * - and enforced constraints, if any, are satisfied + * (e.g. amount of concurrent requests is not greater than some number). + * + * All methods must be called only from scheduler thread for thread-safety. + */ +class ISchedulerNode : public boost::intrusive::list_base_hook<>, private boost::noncopyable +{ +public: + explicit ISchedulerNode(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) + : event_queue(event_queue_) + , info(config, config_prefix) + {} + + virtual ~ISchedulerNode() = default; + + /// Checks if two nodes configuration is equal + virtual bool equals(ISchedulerNode * other) + { + return info.equals(other->info); + } + + /// Attach new child + virtual void attachChild(const std::shared_ptr & child) = 0; + + /// Detach and destroy child + virtual void removeChild(ISchedulerNode * child) = 0; + + /// Get attached child by name + virtual ISchedulerNode * getChild(const String & child_name) = 0; + + /// Activation of child due to the first pending request + /// Should be called on leaf node (i.e. queue) to propagate activation signal through chain to the root + virtual void activateChild(ISchedulerNode * child) = 0; + + /// Returns true iff node is active + virtual bool isActive() = 0; + + /// Returns number of active children + virtual size_t activeChildren() = 0; + + /// Returns the first request to be executed as the first component of resulting pair. + /// The second pair component is `true` iff node is still active after dequeueing. + virtual std::pair dequeueRequest() = 0; + + /// Returns full path string using names of every parent + String getPath() + { + String result; + ISchedulerNode * ptr = this; + while (ptr->parent) + { + result = "/" + ptr->basename + result; + ptr = ptr->parent; + } + return result.empty() ? "/" : result; + } + + /// Attach to a parent (used by attachChild) + virtual void setParent(ISchedulerNode * parent_) + { + parent = parent_; + } + +protected: + /// Notify parents about the first pending request or constraint becoming satisfied. + /// Postponed to be handled in scheduler thread, so it is intended to be called from outside. + void scheduleActivation(); + +public: + EventQueue * const event_queue; + String basename; + SchedulerNodeInfo info; + ISchedulerNode * parent = nullptr; + EventId activation_event_id = 0; // Valid for `ISchedulerNode` placed in EventQueue::activations + + /// Introspection + std::atomic dequeued_requests{0}; + std::atomic canceled_requests{0}; + std::atomic dequeued_cost{0}; + std::atomic canceled_cost{0}; + std::atomic busy_periods{0}; +}; + +using SchedulerNodePtr = std::shared_ptr; + /* * Simple waitable thread-safe FIFO task queue. * Intended to hold postponed events for later handling (usually by scheduler thread). @@ -89,57 +200,70 @@ struct SchedulerNodeInfo class EventQueue { public: - using Event = std::function; + using Task = std::function; + + static constexpr EventId not_postponed = 0; + using TimePoint = std::chrono::system_clock::time_point; using Duration = std::chrono::system_clock::duration; - static constexpr UInt64 not_postponed = 0; + + struct Event + { + const EventId event_id; + Task task; + + Event(EventId event_id_, Task && task_) + : event_id(event_id_) + , task(std::move(task_)) + {} + }; struct Postponed { TimePoint key; - UInt64 id; // for canceling - std::unique_ptr event; + EventId event_id; // for canceling + std::unique_ptr task; - Postponed(TimePoint key_, UInt64 id_, Event && event_) + Postponed(TimePoint key_, EventId event_id_, Task && task_) : key(key_) - , id(id_) - , event(std::make_unique(std::move(event_))) + , event_id(event_id_) + , task(std::make_unique(std::move(task_))) {} bool operator<(const Postponed & rhs) const { - return std::tie(key, id) > std::tie(rhs.key, rhs.id); // reversed for min-heap + return std::tie(key, event_id) > std::tie(rhs.key, rhs.event_id); // reversed for min-heap } }; /// Add an `event` to be processed after `until` time point. - /// Returns a unique id for canceling. - [[nodiscard]] UInt64 postpone(TimePoint until, Event && event) + /// Returns a unique event id for canceling. + [[nodiscard]] EventId postpone(TimePoint until, Task && task) { std::unique_lock lock{mutex}; if (postponed.empty() || until < postponed.front().key) pending.notify_one(); - auto id = ++last_id; - postponed.emplace_back(until, id, std::move(event)); + auto event_id = ++last_event_id; + postponed.emplace_back(until, event_id, std::move(task)); std::push_heap(postponed.begin(), postponed.end()); - return id; + return event_id; } /// Cancel a postponed event using its unique id. /// NOTE: Only postponed events can be canceled. /// NOTE: If you need to cancel enqueued event, consider doing your actions inside another enqueued /// NOTE: event instead. This ensures that all previous events are processed. - bool cancelPostponed(UInt64 postponed_id) + bool cancelPostponed(EventId postponed_event_id) { - if (postponed_id == not_postponed) + if (postponed_event_id == not_postponed) return false; std::unique_lock lock{mutex}; for (auto i = postponed.begin(), e = postponed.end(); i != e; ++i) { - if (i->id == postponed_id) + if (i->event_id == postponed_event_id) { postponed.erase(i); - // It is O(n), but we do not expect either big heaps or frequent cancels. So it is fine. + // It is O(n), but we do not expect neither big heaps nor frequent cancels. So it is fine. std::make_heap(postponed.begin(), postponed.end()); return true; } @@ -148,11 +272,23 @@ public: } /// Add an `event` for immediate processing - void enqueue(Event && event) + void enqueue(Task && task) { std::unique_lock lock{mutex}; - bool was_empty = queue.empty(); - queue.emplace_back(event); + bool was_empty = events.empty() && activations.empty(); + auto event_id = ++last_event_id; + events.emplace_back(event_id, std::move(task)); + if (was_empty) + pending.notify_one(); + } + + /// Add an activation `event` for immediate processing. Activations use a separate queue for performance reasons. + void enqueueActivation(ISchedulerNode * node) + { + std::unique_lock lock{mutex}; + bool was_empty = events.empty() && activations.empty(); + node->activation_event_id = ++last_event_id; + activations.push_back(*node); if (was_empty) pending.notify_one(); } @@ -163,7 +299,7 @@ public: bool forceProcess() { std::unique_lock lock{mutex}; - if (!queue.empty()) + if (!events.empty() || !activations.empty()) { processQueue(std::move(lock)); return true; @@ -181,7 +317,7 @@ public: bool tryProcess() { std::unique_lock lock{mutex}; - if (!queue.empty()) + if (!events.empty() || !activations.empty()) { processQueue(std::move(lock)); return true; @@ -205,7 +341,7 @@ public: std::unique_lock lock{mutex}; while (true) { - if (!queue.empty()) + if (!events.empty() || !activations.empty()) { processQueue(std::move(lock)); return; @@ -269,141 +405,69 @@ private: void processQueue(std::unique_lock && lock) { - Event event = std::move(queue.front()); - queue.pop_front(); + if (events.empty()) + { + processActivation(std::move(lock)); + return; + } + if (activations.empty()) + { + processEvent(std::move(lock)); + return; + } + if (activations.front().activation_event_id < events.front().event_id) + processActivation(std::move(lock)); + else + processEvent(std::move(lock)); + } + + void processActivation(std::unique_lock && lock) + { + ISchedulerNode * node = &activations.front(); + activations.pop_front(); + node->activation_event_id = 0; lock.unlock(); // do not hold queue mutex while processing events - event(); + node->parent->activateChild(node); + } + + void processEvent(std::unique_lock && lock) + { + Task task = std::move(events.front().task); + events.pop_front(); + lock.unlock(); // do not hold queue mutex while processing events + task(); } void processPostponed(std::unique_lock && lock) { - Event event = std::move(*postponed.front().event); + Task task = std::move(*postponed.front().task); std::pop_heap(postponed.begin(), postponed.end()); postponed.pop_back(); lock.unlock(); // do not hold queue mutex while processing events - event(); + task(); } std::mutex mutex; std::condition_variable pending; - std::deque queue; + + // `events` and `activations` logically represent one ordered queue. To preserve the common order we use `EventId` + // Activations are stored in a separate queue for performance reasons (mostly to avoid any allocations) + std::deque events; + boost::intrusive::list activations; + std::vector postponed; - UInt64 last_id = 0; + EventId last_event_id = 0; std::atomic manual_time{TimePoint()}; // for tests only }; -/* - * Node of hierarchy for scheduling requests for resource. Base class for all - * kinds of scheduling elements (queues, policies, constraints and schedulers). - * - * Root node is a scheduler, which has it's thread to dequeue requests, - * execute requests (see ResourceRequest) and process events in a thread-safe manner. - * Immediate children of the scheduler represent independent resources. - * Each resource has it's own hierarchy to achieve required scheduling policies. - * Non-leaf nodes do not hold requests, but keep scheduling state - * (e.g. consumption history, amount of in-flight requests, etc). - * Leafs of hierarchy are queues capable of holding pending requests. - * - * scheduler (SchedulerRoot) - * / \ - * constraint constraint (SemaphoreConstraint) - * | | - * policy policy (PriorityPolicy) - * / \ / \ - * q1 q2 q3 q4 (FifoQueue) - * - * Dequeueing request from an inner node will dequeue request from one of active leaf-queues in its subtree. - * Node is considered to be active iff: - * - it has at least one pending request in one of leaves of it's subtree; - * - and enforced constraints, if any, are satisfied - * (e.g. amount of concurrent requests is not greater than some number). - * - * All methods must be called only from scheduler thread for thread-safety. - */ -class ISchedulerNode : private boost::noncopyable +inline void ISchedulerNode::scheduleActivation() { -public: - explicit ISchedulerNode(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) - : event_queue(event_queue_) - , info(config, config_prefix) - {} - - virtual ~ISchedulerNode() = default; - - /// Checks if two nodes configuration is equal - virtual bool equals(ISchedulerNode * other) + if (likely(parent)) { - return info.equals(other->info); + // The same as `enqueue([this] { parent->activateChild(this); });` but faster + event_queue->enqueueActivation(this); } - - /// Attach new child - virtual void attachChild(const std::shared_ptr & child) = 0; - - /// Detach and destroy child - virtual void removeChild(ISchedulerNode * child) = 0; - - /// Get attached child by name - virtual ISchedulerNode * getChild(const String & child_name) = 0; - - /// Activation of child due to the first pending request - /// Should be called on leaf node (i.e. queue) to propagate activation signal through chain to the root - virtual void activateChild(ISchedulerNode * child) = 0; - - /// Returns true iff node is active - virtual bool isActive() = 0; - - /// Returns number of active children - virtual size_t activeChildren() = 0; - - /// Returns the first request to be executed as the first component of resulting pair. - /// The second pair component is `true` iff node is still active after dequeueing. - virtual std::pair dequeueRequest() = 0; - - /// Returns full path string using names of every parent - String getPath() - { - String result; - ISchedulerNode * ptr = this; - while (ptr->parent) - { - result = "/" + ptr->basename + result; - ptr = ptr->parent; - } - return result.empty() ? "/" : result; - } - - /// Attach to a parent (used by attachChild) - virtual void setParent(ISchedulerNode * parent_) - { - parent = parent_; - } - -protected: - /// Notify parents about the first pending request or constraint becoming satisfied. - /// Postponed to be handled in scheduler thread, so it is intended to be called from outside. - void scheduleActivation() - { - if (likely(parent)) - { - event_queue->enqueue([this] { parent->activateChild(this); }); - } - } - -public: - EventQueue * const event_queue; - String basename; - SchedulerNodeInfo info; - ISchedulerNode * parent = nullptr; - - /// Introspection - std::atomic dequeued_requests{0}; - std::atomic canceled_requests{0}; - std::atomic dequeued_cost{0}; - std::atomic canceled_cost{0}; - std::atomic busy_periods{0}; -}; - -using SchedulerNodePtr = std::shared_ptr; +} } diff --git a/src/Common/Scheduler/Nodes/tests/gtest_event_queue.cpp b/src/Common/Scheduler/Nodes/tests/gtest_event_queue.cpp new file mode 100644 index 00000000000..07798f78080 --- /dev/null +++ b/src/Common/Scheduler/Nodes/tests/gtest_event_queue.cpp @@ -0,0 +1,143 @@ +#include +#include + +#include + +using namespace DB; + +class FakeSchedulerNode : public ISchedulerNode +{ +public: + explicit FakeSchedulerNode(String & log_, EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) + : ISchedulerNode(event_queue_, config, config_prefix) + , log(log_) + {} + + void attachChild(const SchedulerNodePtr & child) override + { + log += " +" + child->basename; + } + + void removeChild(ISchedulerNode * child) override + { + log += " -" + child->basename; + } + + ISchedulerNode * getChild(const String & /* child_name */) override + { + return nullptr; + } + + void activateChild(ISchedulerNode * child) override + { + log += " A" + child->basename; + } + + bool isActive() override + { + return false; + } + + size_t activeChildren() override + { + return 0; + } + + std::pair dequeueRequest() override + { + log += " D"; + return {nullptr, false}; + } + +private: + String & log; +}; + +struct QueueTest { + String log; + EventQueue event_queue; + FakeSchedulerNode root_node; + + QueueTest() + : root_node(log, &event_queue) + {} + + SchedulerNodePtr makeNode(const String & name) + { + auto node = std::make_shared(log, &event_queue); + node->basename = name; + node->setParent(&root_node); + return std::static_pointer_cast(node); + } + + void process(EventQueue::TimePoint now, const String & expected_log, size_t limit = size_t(-1)) + { + event_queue.setManualTime(now); + for (;limit > 0; limit--) + { + if (!event_queue.tryProcess()) + break; + } + EXPECT_EQ(log, expected_log); + log.clear(); + } + + void activate(const SchedulerNodePtr & node) + { + event_queue.enqueueActivation(node.get()); + } + + void event(const String & text) + { + event_queue.enqueue([this, text] { log += " " + text; }); + } + + EventId postpone(EventQueue::TimePoint until, const String & text) + { + return event_queue.postpone(until, [this, text] { log += " " + text; }); + } + + void cancel(EventId event_id) + { + event_queue.cancelPostponed(event_id); + } +}; + +TEST(SchedulerEventQueue, Smoke) +{ + QueueTest t; + + using namespace std::chrono_literals; + + EventQueue::TimePoint start = std::chrono::system_clock::now(); + t.process(start, "", 0); + + // Activations + auto node1 = t.makeNode("1"); + auto node2 = t.makeNode("2"); + t.activate(node2); + t.activate(node1); + t.process(start + 42s, " A2 A1"); + + // Events + t.event("E1"); + t.event("E2"); + t.process(start + 100s, " E1 E2"); + + // Postponed events + t.postpone(start + 200s, "P200"); + auto p190 = t.postpone(start + 200s, "P190"); + t.postpone(start + 150s, "P150"); + t.postpone(start + 175s, "P175"); + t.process(start + 180s, " P150 P175"); + t.event("E3"); + t.cancel(p190); + t.process(start + 300s, " E3 P200"); + + // Ordering of events and activations + t.event("E1"); + t.activate(node1); + t.event("E2"); + t.activate(node2); + t.process(start + 300s, " E1 A1 E2 A2"); +} diff --git a/src/Common/Scheduler/Nodes/tests/gtest_throttler_constraint.cpp b/src/Common/Scheduler/Nodes/tests/gtest_throttler_constraint.cpp index 9703227ccfc..6cfccb252fa 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_throttler_constraint.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_throttler_constraint.cpp @@ -5,8 +5,6 @@ #include #include -#include "Common/Scheduler/ISchedulerNode.h" -#include "Common/Scheduler/ResourceRequest.h" using namespace DB; diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 15803db4929..a9307c3be99 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -10,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index 95906c63349..b87fcf419d3 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -1,9 +1,7 @@ #pragma once -#include #include #include -#include #include #include @@ -27,12 +25,13 @@ M(ZooKeeperLogElement) \ M(ProcessorProfileLogElement) \ M(TextLogElement) \ - M(S3QueueLogElement) \ + M(ObjectStorageQueueLogElement) \ M(FilesystemCacheLogElement) \ M(FilesystemReadPrefetchesLogElement) \ M(AsynchronousInsertLogElement) \ M(BackupLogElement) \ - M(BlobStorageLogElement) + M(BlobStorageLogElement) \ + M(ErrorLogElement) namespace Poco { diff --git a/src/Common/TerminalSize.cpp b/src/Common/TerminalSize.cpp index bc5b4474384..8139f4f7616 100644 --- a/src/Common/TerminalSize.cpp +++ b/src/Common/TerminalSize.cpp @@ -13,17 +13,17 @@ namespace DB::ErrorCodes extern const int SYSTEM_ERROR; } -uint16_t getTerminalWidth() +uint16_t getTerminalWidth(int in_fd, int err_fd) { struct winsize terminal_size {}; - if (isatty(STDIN_FILENO)) + if (isatty(in_fd)) { - if (ioctl(STDIN_FILENO, TIOCGWINSZ, &terminal_size)) + if (ioctl(in_fd, TIOCGWINSZ, &terminal_size)) throw DB::ErrnoException(DB::ErrorCodes::SYSTEM_ERROR, "Cannot obtain terminal window size (ioctl TIOCGWINSZ)"); } - else if (isatty(STDERR_FILENO)) + else if (isatty(err_fd)) { - if (ioctl(STDERR_FILENO, TIOCGWINSZ, &terminal_size)) + if (ioctl(err_fd, TIOCGWINSZ, &terminal_size)) throw DB::ErrnoException(DB::ErrorCodes::SYSTEM_ERROR, "Cannot obtain terminal window size (ioctl TIOCGWINSZ)"); } /// Default - 0. diff --git a/src/Common/TerminalSize.h b/src/Common/TerminalSize.h index b5fc6de7921..f1334f2bcb9 100644 --- a/src/Common/TerminalSize.h +++ b/src/Common/TerminalSize.h @@ -1,16 +1,16 @@ #pragma once #include +#include #include namespace po = boost::program_options; -uint16_t getTerminalWidth(); +uint16_t getTerminalWidth(int in_fd = STDIN_FILENO, int err_fd = STDERR_FILENO); /** Creates po::options_description with name and an appropriate size for option displaying * when program is called with option --help * */ po::options_description createOptionsDescription(const std::string &caption, unsigned short terminal_width); /// NOLINT - diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 7d574247aa5..2c6cbc4a5d5 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -559,6 +559,8 @@ public: /// Useful to check owner of ephemeral node. virtual int64_t getSessionID() const = 0; + virtual String tryGetAvailabilityZone() { return ""; } + /// If the method will throw an exception, callbacks won't be called. /// /// After the method is executed successfully, you must wait for callbacks @@ -635,10 +637,6 @@ public: virtual const DB::KeeperFeatureFlags * getKeeperFeatureFlags() const { return nullptr; } - /// A ZooKeeper session can have an optional deadline set on it. - /// After it has been reached, the session needs to be finalized. - virtual bool hasReachedDeadline() const = 0; - /// Expire session and finish all pending requests virtual void finalize(const String & reason) = 0; }; diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index 2774055652c..2194ad015bf 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -39,7 +39,6 @@ public: ~TestKeeper() override; bool isExpired() const override { return expired; } - bool hasReachedDeadline() const override { return false; } Int8 getConnectedNodeIdx() const override { return 0; } String getConnectedHostPort() const override { return "TestKeeper:0000"; } int32_t getConnectionXid() const override { return 0; } diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 4ec44a39136..56db9adb787 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -16,10 +17,12 @@ #include #include #include +#include #include "Common/ZooKeeper/IKeeper.h" #include #include #include +#include #include #include @@ -55,70 +58,120 @@ static void check(Coordination::Error code, const std::string & path) throw KeeperException::fromPath(code, path); } +UInt64 getSecondsUntilReconnect(const ZooKeeperArgs & args) +{ + std::uniform_int_distribution fallback_session_lifetime_distribution + { + args.fallback_session_lifetime.min_sec, + args.fallback_session_lifetime.max_sec, + }; + UInt32 session_lifetime_seconds = fallback_session_lifetime_distribution(thread_local_rng); + return session_lifetime_seconds; +} -void ZooKeeper::init(ZooKeeperArgs args_) +void ZooKeeper::updateAvailabilityZones() +{ + ShuffleHosts shuffled_hosts = shuffleHosts(); + + for (const auto & node : shuffled_hosts) + { + try + { + ShuffleHosts single_node{node}; + auto tmp_impl = std::make_unique(single_node, args, zk_log); + auto idx = node.original_index; + availability_zones[idx] = tmp_impl->tryGetAvailabilityZone(); + LOG_TEST(log, "Got availability zone for {}: {}", args.hosts[idx], availability_zones[idx]); + } + catch (...) + { + DB::tryLogCurrentException(log, "Failed to get availability zone for " + node.host); + } + } + LOG_DEBUG(log, "Updated availability zones: [{}]", fmt::join(availability_zones, ", ")); +} + +void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr existing_impl) { args = std::move(args_); log = getLogger("ZooKeeper"); - if (args.implementation == "zookeeper") + if (existing_impl) + { + chassert(args.implementation == "zookeeper"); + impl = std::move(existing_impl); + LOG_INFO(log, "Switching to connection to a more optimal node {}", impl->getConnectedHostPort()); + } + else if (args.implementation == "zookeeper") { if (args.hosts.empty()) throw KeeperException::fromMessage(Coordination::Error::ZBADARGUMENTS, "No hosts passed to ZooKeeper constructor."); - Coordination::ZooKeeper::Nodes nodes; - nodes.reserve(args.hosts.size()); + chassert(args.availability_zones.size() == args.hosts.size()); + if (availability_zones.empty()) + { + /// availability_zones is empty on server startup or after config reloading + /// We will keep the az info when starting new sessions + availability_zones = args.availability_zones; + LOG_TEST(log, "Availability zones from config: [{}], client: {}", fmt::join(availability_zones, ", "), args.client_availability_zone); + if (args.availability_zone_autodetect) + updateAvailabilityZones(); + } + chassert(availability_zones.size() == args.hosts.size()); /// Shuffle the hosts to distribute the load among ZooKeeper nodes. - std::vector shuffled_hosts = shuffleHosts(); + ShuffleHosts shuffled_hosts = shuffleHosts(); - bool dns_error = false; - for (auto & host : shuffled_hosts) - { - auto & host_string = host.host; - try - { - const bool secure = startsWith(host_string, "secure://"); - - if (secure) - host_string.erase(0, strlen("secure://")); - - /// We want to resolve all hosts without DNS cache for keeper connection. - Coordination::DNSResolver::instance().removeHostFromCache(host_string); - - const Poco::Net::SocketAddress host_socket_addr{host_string}; - LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, host_socket_addr.toString()); - nodes.emplace_back(Coordination::ZooKeeper::Node{host_socket_addr, host.original_index, secure}); - } - catch (const Poco::Net::HostNotFoundException & e) - { - /// Most likely it's misconfiguration and wrong hostname was specified - LOG_ERROR(log, "Cannot use ZooKeeper host {}, reason: {}", host_string, e.displayText()); - } - catch (const Poco::Net::DNSException & e) - { - /// Most likely DNS is not available now - dns_error = true; - LOG_ERROR(log, "Cannot use ZooKeeper host {} due to DNS error: {}", host_string, e.displayText()); - } - } - - if (nodes.empty()) - { - /// For DNS errors we throw exception with ZCONNECTIONLOSS code, so it will be considered as hardware error, not user error - if (dns_error) - throw KeeperException::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Cannot resolve any of provided ZooKeeper hosts due to DNS error"); - else - throw KeeperException::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Cannot use any of provided ZooKeeper nodes"); - } - - impl = std::make_unique(nodes, args, zk_log); + impl = std::make_unique(shuffled_hosts, args, zk_log); + Int8 node_idx = impl->getConnectedNodeIdx(); if (args.chroot.empty()) LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(args.hosts, ",")); else LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(args.hosts, ","), args.chroot); + + + /// If the balancing strategy has an optimal node then it will be the first in the list + bool connected_to_suboptimal_node = node_idx != shuffled_hosts[0].original_index; + bool respect_az = args.prefer_local_availability_zone && !args.client_availability_zone.empty(); + bool may_benefit_from_reconnecting = respect_az || args.get_priority_load_balancing.hasOptimalNode(); + if (connected_to_suboptimal_node && may_benefit_from_reconnecting) + { + auto reconnect_timeout_sec = getSecondsUntilReconnect(args); + LOG_DEBUG(log, "Connected to a suboptimal ZooKeeper host ({}, index {})." + " To preserve balance in ZooKeeper usage, this ZooKeeper session will expire in {} seconds", + impl->getConnectedHostPort(), node_idx, reconnect_timeout_sec); + + auto reconnect_task_holder = DB::Context::getGlobalContextInstance()->getSchedulePool().createTask("ZKReconnect", [this, optimal_host = shuffled_hosts[0]]() + { + try + { + LOG_DEBUG(log, "Trying to connect to a more optimal node {}", optimal_host.host); + ShuffleHosts node{optimal_host}; + std::unique_ptr new_impl = std::make_unique(node, args, zk_log); + Int8 new_node_idx = new_impl->getConnectedNodeIdx(); + + /// Maybe the node was unavailable when getting AZs first time, update just in case + if (args.availability_zone_autodetect && availability_zones[new_node_idx].empty()) + { + availability_zones[new_node_idx] = new_impl->tryGetAvailabilityZone(); + LOG_DEBUG(log, "Got availability zone for {}: {}", optimal_host.host, availability_zones[new_node_idx]); + } + + optimal_impl = std::move(new_impl); + impl->finalize("Connected to a more optimal node"); + } + catch (...) + { + LOG_WARNING(log, "Failed to connect to a more optimal ZooKeeper, will try again later: {}", DB::getCurrentExceptionMessage(/*with_stacktrace*/ false)); + (*reconnect_task)->scheduleAfter(getSecondsUntilReconnect(args) * 1000); + } + }); + reconnect_task = std::make_unique(std::move(reconnect_task_holder)); + (*reconnect_task)->activate(); + (*reconnect_task)->scheduleAfter(reconnect_timeout_sec * 1000); + } } else if (args.implementation == "testkeeper") { @@ -152,29 +205,53 @@ void ZooKeeper::init(ZooKeeperArgs args_) } } +ZooKeeper::~ZooKeeper() +{ + if (reconnect_task) + (*reconnect_task)->deactivate(); +} ZooKeeper::ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr zk_log_) : zk_log(std::move(zk_log_)) { - init(args_); + init(args_, /*existing_impl*/ {}); +} + + +ZooKeeper::ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr zk_log_, Strings availability_zones_, std::unique_ptr existing_impl) + : availability_zones(std::move(availability_zones_)), zk_log(std::move(zk_log_)) +{ + if (availability_zones.size() != args_.hosts.size()) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Argument sizes mismatch: availability_zones count {} and hosts count {}", + availability_zones.size(), args_.hosts.size()); + init(args_, std::move(existing_impl)); } ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr zk_log_) : zk_log(std::move(zk_log_)) { - init(ZooKeeperArgs(config, config_name)); + init(ZooKeeperArgs(config, config_name), /*existing_impl*/ {}); } -std::vector ZooKeeper::shuffleHosts() const +ShuffleHosts ZooKeeper::shuffleHosts() const { - std::function get_priority = args.get_priority_load_balancing.getPriorityFunc(args.get_priority_load_balancing.load_balancing, 0, args.hosts.size()); - std::vector shuffle_hosts; + std::function get_priority = args.get_priority_load_balancing.getPriorityFunc( + args.get_priority_load_balancing.load_balancing, /* offset for first_or_random */ 0, args.hosts.size()); + ShuffleHosts shuffle_hosts; for (size_t i = 0; i < args.hosts.size(); ++i) { ShuffleHost shuffle_host; shuffle_host.host = args.hosts[i]; shuffle_host.original_index = static_cast(i); + + shuffle_host.secure = startsWith(shuffle_host.host, "secure://"); + if (shuffle_host.secure) + shuffle_host.host.erase(0, strlen("secure://")); + + if (!args.client_availability_zone.empty() && !availability_zones[i].empty()) + shuffle_host.az_info = availability_zones[i] == args.client_availability_zone ? ShuffleHost::SAME : ShuffleHost::OTHER; + if (get_priority) shuffle_host.priority = get_priority(i); shuffle_host.randomize(); @@ -1023,7 +1100,10 @@ ZooKeeperPtr ZooKeeper::create(const Poco::Util::AbstractConfiguration & config, ZooKeeperPtr ZooKeeper::startNewSession() const { - auto res = std::shared_ptr(new ZooKeeper(args, zk_log)); + if (reconnect_task) + (*reconnect_task)->deactivate(); + + auto res = std::shared_ptr(new ZooKeeper(args, zk_log, availability_zones, std::move(optimal_impl))); res->initSession(); return res; } @@ -1456,6 +1536,16 @@ int32_t ZooKeeper::getConnectionXid() const return impl->getConnectionXid(); } +String ZooKeeper::getConnectedHostAvailabilityZone() const +{ + if (args.implementation != "zookeeper" || !impl) + return ""; + Int8 idx = impl->getConnectedNodeIdx(); + if (idx < 0) + return ""; /// session expired + return availability_zones.at(idx); +} + size_t getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses) { if (responses.empty()) diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 08ff60a80cf..4ae2cfa6096 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -32,6 +32,7 @@ namespace DB { class ZooKeeperLog; class ZooKeeperWithFaultInjection; +class BackgroundSchedulePoolTaskHolder; namespace ErrorCodes { @@ -48,11 +49,23 @@ constexpr size_t MULTI_BATCH_SIZE = 100; struct ShuffleHost { + enum AvailabilityZoneInfo + { + SAME = 0, + UNKNOWN = 1, + OTHER = 2, + }; + String host; + bool secure = false; UInt8 original_index = 0; + AvailabilityZoneInfo az_info = UNKNOWN; Priority priority; UInt64 random = 0; + /// We should resolve it each time without caching + mutable std::optional address; + void randomize() { random = thread_local_rng(); @@ -60,11 +73,13 @@ struct ShuffleHost static bool compare(const ShuffleHost & lhs, const ShuffleHost & rhs) { - return std::forward_as_tuple(lhs.priority, lhs.random) - < std::forward_as_tuple(rhs.priority, rhs.random); + return std::forward_as_tuple(lhs.az_info, lhs.priority, lhs.random) + < std::forward_as_tuple(rhs.az_info, rhs.priority, rhs.random); } }; +using ShuffleHosts = std::vector; + struct RemoveException { explicit RemoveException(std::string_view path_ = "", bool remove_subtree_ = true) @@ -197,6 +212,9 @@ class ZooKeeper explicit ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr zk_log_ = nullptr); + /// Allows to keep info about availability zones when starting a new session + ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr zk_log_, Strings availability_zones_, std::unique_ptr existing_impl); + /** Config of the form: @@ -228,7 +246,9 @@ public: using Ptr = std::shared_ptr; using ErrorsList = std::initializer_list; - std::vector shuffleHosts() const; + ~ZooKeeper(); + + ShuffleHosts shuffleHosts() const; static Ptr create(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, @@ -596,8 +616,6 @@ public: UInt32 getSessionUptime() const { return static_cast(session_uptime.elapsedSeconds()); } - bool hasReachedDeadline() const { return impl->hasReachedDeadline(); } - uint64_t getSessionTimeoutMS() const { return args.session_timeout_ms; } void setServerCompletelyStarted(); @@ -606,6 +624,8 @@ public: String getConnectedHostPort() const; int32_t getConnectionXid() const; + String getConnectedHostAvailabilityZone() const; + const DB::KeeperFeatureFlags * getKeeperFeatureFlags() const { return impl->getKeeperFeatureFlags(); } /// Checks that our session was not killed, and allows to avoid applying a request from an old lost session. @@ -625,7 +645,8 @@ public: void addCheckSessionOp(Coordination::Requests & requests) const; private: - void init(ZooKeeperArgs args_); + void init(ZooKeeperArgs args_, std::unique_ptr existing_impl); + void updateAvailabilityZones(); /// The following methods don't any throw exceptions but return error codes. Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); @@ -690,15 +711,20 @@ private: } std::unique_ptr impl; + mutable std::unique_ptr optimal_impl; ZooKeeperArgs args; + Strings availability_zones; + LoggerPtr log = nullptr; std::shared_ptr zk_log; AtomicStopwatch session_uptime; int32_t session_node_version; + + std::unique_ptr reconnect_task; }; diff --git a/src/Common/ZooKeeper/ZooKeeperArgs.cpp b/src/Common/ZooKeeper/ZooKeeperArgs.cpp index a581b6a7f38..18dff779a70 100644 --- a/src/Common/ZooKeeper/ZooKeeperArgs.cpp +++ b/src/Common/ZooKeeper/ZooKeeperArgs.cpp @@ -5,6 +5,9 @@ #include #include #include +#include +#include +#include #include namespace DB @@ -53,6 +56,7 @@ ZooKeeperArgs::ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, c ZooKeeperArgs::ZooKeeperArgs(const String & hosts_string) { splitInto<','>(hosts, hosts_string); + availability_zones.resize(hosts.size()); } void ZooKeeperArgs::initFromKeeperServerSection(const Poco::Util::AbstractConfiguration & config) @@ -103,8 +107,11 @@ void ZooKeeperArgs::initFromKeeperServerSection(const Poco::Util::AbstractConfig for (const auto & key : keys) { if (startsWith(key, "server")) + { hosts.push_back( (secure ? "secure://" : "") + config.getString(raft_configuration_key + "." + key + ".hostname") + ":" + tcp_port); + availability_zones.push_back(config.getString(raft_configuration_key + "." + key + ".availability_zone", "")); + } } static constexpr std::array load_balancing_keys @@ -123,11 +130,15 @@ void ZooKeeperArgs::initFromKeeperServerSection(const Poco::Util::AbstractConfig auto load_balancing = magic_enum::enum_cast(Poco::toUpper(load_balancing_str)); if (!load_balancing) throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str); - get_priority_load_balancing.load_balancing = *load_balancing; + get_priority_load_balancing = DB::GetPriorityForLoadBalancing(*load_balancing, thread_local_rng() % hosts.size()); break; } } + availability_zone_autodetect = config.getBool(std::string{config_name} + ".availability_zone_autodetect", false); + prefer_local_availability_zone = config.getBool(std::string{config_name} + ".prefer_local_availability_zone", false); + if (prefer_local_availability_zone) + client_availability_zone = DB::PlacementInfo::PlacementInfo::instance().getAvailabilityZone(); } void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) @@ -137,6 +148,8 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_name, keys); + std::optional load_balancing; + for (const auto & key : keys) { if (key.starts_with("node")) @@ -144,6 +157,7 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio hosts.push_back( (config.getBool(config_name + "." + key + ".secure", false) ? "secure://" : "") + config.getString(config_name + "." + key + ".host") + ":" + config.getString(config_name + "." + key + ".port", "2181")); + availability_zones.push_back(config.getString(config_name + "." + key + ".availability_zone", "")); } else if (key == "session_timeout_ms") { @@ -199,6 +213,10 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio { sessions_path = config.getString(config_name + "." + key); } + else if (key == "prefer_local_availability_zone") + { + prefer_local_availability_zone = config.getBool(config_name + "." + key); + } else if (key == "implementation") { implementation = config.getString(config_name + "." + key); @@ -207,10 +225,9 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio { String load_balancing_str = config.getString(config_name + "." + key); /// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`) - auto load_balancing = magic_enum::enum_cast(Poco::toUpper(load_balancing_str)); + load_balancing = magic_enum::enum_cast(Poco::toUpper(load_balancing_str)); if (!load_balancing) throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str); - get_priority_load_balancing.load_balancing = *load_balancing; } else if (key == "fallback_session_lifetime") { @@ -224,9 +241,19 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio { use_compression = config.getBool(config_name + "." + key); } + else if (key == "availability_zone_autodetect") + { + availability_zone_autodetect = config.getBool(config_name + "." + key); + } else throw KeeperException(Coordination::Error::ZBADARGUMENTS, "Unknown key {} in config file", key); } + + if (load_balancing) + get_priority_load_balancing = DB::GetPriorityForLoadBalancing(*load_balancing, thread_local_rng() % hosts.size()); + + if (prefer_local_availability_zone) + client_availability_zone = DB::PlacementInfo::PlacementInfo::instance().getAvailabilityZone(); } } diff --git a/src/Common/ZooKeeper/ZooKeeperArgs.h b/src/Common/ZooKeeper/ZooKeeperArgs.h index 27ba173c0c3..945b77bf9c1 100644 --- a/src/Common/ZooKeeper/ZooKeeperArgs.h +++ b/src/Common/ZooKeeper/ZooKeeperArgs.h @@ -32,10 +32,12 @@ struct ZooKeeperArgs String zookeeper_name = "zookeeper"; String implementation = "zookeeper"; Strings hosts; + Strings availability_zones; String auth_scheme; String identity; String chroot; String sessions_path = "/clickhouse/sessions"; + String client_availability_zone; int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS; int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; @@ -47,6 +49,8 @@ struct ZooKeeperArgs UInt64 send_sleep_ms = 0; UInt64 recv_sleep_ms = 0; bool use_compression = false; + bool prefer_local_availability_zone = false; + bool availability_zone_autodetect = false; SessionLifetimeConfiguration fallback_session_lifetime = {}; DB::GetPriorityForLoadBalancing get_priority_load_balancing; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index ed7498b1ac9..8653af51308 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -23,6 +23,9 @@ #include #include +#include +#include + #include "Coordination/KeeperConstants.h" #include "config.h" @@ -338,7 +341,7 @@ ZooKeeper::~ZooKeeper() ZooKeeper::ZooKeeper( - const Nodes & nodes, + const zkutil::ShuffleHosts & nodes, const zkutil::ZooKeeperArgs & args_, std::shared_ptr zk_log_) : args(args_) @@ -426,7 +429,7 @@ ZooKeeper::ZooKeeper( void ZooKeeper::connect( - const Nodes & nodes, + const zkutil::ShuffleHosts & nodes, Poco::Timespan connection_timeout) { if (nodes.empty()) @@ -434,15 +437,51 @@ void ZooKeeper::connect( static constexpr size_t num_tries = 3; bool connected = false; + bool dns_error = false; + + size_t resolved_count = 0; + for (const auto & node : nodes) + { + try + { + const Poco::Net::SocketAddress host_socket_addr{node.host}; + LOG_TRACE(log, "Adding ZooKeeper host {} ({}), az: {}, priority: {}", node.host, host_socket_addr.toString(), node.az_info, node.priority); + node.address = host_socket_addr; + ++resolved_count; + } + catch (const Poco::Net::HostNotFoundException & e) + { + /// Most likely it's misconfiguration and wrong hostname was specified + LOG_ERROR(log, "Cannot use ZooKeeper host {}, reason: {}", node.host, e.displayText()); + } + catch (const Poco::Net::DNSException & e) + { + /// Most likely DNS is not available now + dns_error = true; + LOG_ERROR(log, "Cannot use ZooKeeper host {} due to DNS error: {}", node.host, e.displayText()); + } + } + + if (resolved_count == 0) + { + /// For DNS errors we throw exception with ZCONNECTIONLOSS code, so it will be considered as hardware error, not user error + if (dns_error) + throw zkutil::KeeperException::fromMessage( + Coordination::Error::ZCONNECTIONLOSS, "Cannot resolve any of provided ZooKeeper hosts due to DNS error"); + else + throw zkutil::KeeperException::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Cannot use any of provided ZooKeeper nodes"); + } WriteBufferFromOwnString fail_reasons; for (size_t try_no = 0; try_no < num_tries; ++try_no) { - for (size_t i = 0; i < nodes.size(); ++i) + for (const auto & node : nodes) { - const auto & node = nodes[i]; try { + if (!node.address) + continue; + /// Reset the state of previous attempt. if (node.secure) { @@ -458,7 +497,7 @@ void ZooKeeper::connect( socket = Poco::Net::StreamSocket(); } - socket.connect(node.address, connection_timeout); + socket.connect(*node.address, connection_timeout); socket_address = socket.peerAddress(); socket.setReceiveTimeout(args.operation_timeout_ms * 1000); @@ -498,27 +537,11 @@ void ZooKeeper::connect( } original_index = static_cast(node.original_index); - - if (i != 0) - { - std::uniform_int_distribution fallback_session_lifetime_distribution - { - args.fallback_session_lifetime.min_sec, - args.fallback_session_lifetime.max_sec, - }; - UInt32 session_lifetime_seconds = fallback_session_lifetime_distribution(thread_local_rng); - client_session_deadline = clock::now() + std::chrono::seconds(session_lifetime_seconds); - - LOG_DEBUG(log, "Connected to a suboptimal ZooKeeper host ({}, index {})." - " To preserve balance in ZooKeeper usage, this ZooKeeper session will expire in {} seconds", - node.address.toString(), i, session_lifetime_seconds); - } - break; } catch (...) { - fail_reasons << "\n" << getCurrentExceptionMessage(false) << ", " << node.address.toString(); + fail_reasons << "\n" << getCurrentExceptionMessage(false) << ", " << node.address->toString(); } } @@ -532,6 +555,9 @@ void ZooKeeper::connect( bool first = true; for (const auto & node : nodes) { + if (!node.address) + continue; + if (first) first = false; else @@ -540,7 +566,7 @@ void ZooKeeper::connect( if (node.secure) message << "secure://"; - message << node.address.toString(); + message << node.address->toString(); } message << fail_reasons.str() << "\n"; @@ -1153,7 +1179,6 @@ void ZooKeeper::pushRequest(RequestInfo && info) { try { - checkSessionDeadline(); info.time = clock::now(); auto maybe_zk_log = std::atomic_load(&zk_log); if (maybe_zk_log) @@ -1201,44 +1226,44 @@ bool ZooKeeper::isFeatureEnabled(KeeperFeatureFlag feature_flag) const return keeper_feature_flags.isEnabled(feature_flag); } -void ZooKeeper::initFeatureFlags() +std::optional ZooKeeper::tryGetSystemZnode(const std::string & path, const std::string & description) { - const auto try_get = [&](const std::string & path, const std::string & description) -> std::optional + auto promise = std::make_shared>(); + auto future = promise->get_future(); + + auto callback = [promise](const Coordination::GetResponse & response) mutable { - auto promise = std::make_shared>(); - auto future = promise->get_future(); - - auto callback = [promise](const Coordination::GetResponse & response) mutable - { - promise->set_value(response); - }; - - get(path, std::move(callback), {}); - if (future.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) - throw Exception(Error::ZOPERATIONTIMEOUT, "Failed to get {}: timeout", description); - - auto response = future.get(); - - if (response.error == Coordination::Error::ZNONODE) - { - LOG_TRACE(log, "Failed to get {}", description); - return std::nullopt; - } - else if (response.error != Coordination::Error::ZOK) - { - throw Exception(response.error, "Failed to get {}", description); - } - - return std::move(response.data); + promise->set_value(response); }; - if (auto feature_flags = try_get(keeper_api_feature_flags_path, "feature flags"); feature_flags.has_value()) + get(path, std::move(callback), {}); + if (future.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) + throw Exception(Error::ZOPERATIONTIMEOUT, "Failed to get {}: timeout", description); + + auto response = future.get(); + + if (response.error == Coordination::Error::ZNONODE) + { + LOG_TRACE(log, "Failed to get {}", description); + return std::nullopt; + } + else if (response.error != Coordination::Error::ZOK) + { + throw Exception(response.error, "Failed to get {}", description); + } + + return std::move(response.data); +} + +void ZooKeeper::initFeatureFlags() +{ + if (auto feature_flags = tryGetSystemZnode(keeper_api_feature_flags_path, "feature flags"); feature_flags.has_value()) { keeper_feature_flags.setFeatureFlags(std::move(*feature_flags)); return; } - auto keeper_api_version_string = try_get(keeper_api_version_path, "API version"); + auto keeper_api_version_string = tryGetSystemZnode(keeper_api_version_path, "API version"); DB::KeeperApiVersion keeper_api_version{DB::KeeperApiVersion::ZOOKEEPER_COMPATIBLE}; @@ -1256,6 +1281,17 @@ void ZooKeeper::initFeatureFlags() keeper_feature_flags.fromApiVersion(keeper_api_version); } +String ZooKeeper::tryGetAvailabilityZone() +{ + auto res = tryGetSystemZnode(keeper_availability_zone_path, "availability zone"); + if (res) + { + LOG_TRACE(log, "Availability zone for ZooKeeper at {}: {}", getConnectedHostPort(), *res); + return *res; + } + return ""; +} + void ZooKeeper::executeGenericRequest( const ZooKeeperRequestPtr & request, @@ -1587,17 +1623,6 @@ void ZooKeeper::setupFaultDistributions() inject_setup.test_and_set(); } -void ZooKeeper::checkSessionDeadline() const -{ - if (unlikely(hasReachedDeadline())) - throw Exception::fromMessage(Error::ZSESSIONEXPIRED, "Session expired (force expiry client-side)"); -} - -bool ZooKeeper::hasReachedDeadline() const -{ - return client_session_deadline.has_value() && clock::now() >= client_session_deadline.value(); -} - void ZooKeeper::maybeInjectSendFault() { if (unlikely(inject_setup.test() && send_inject_fault && send_inject_fault.value()(thread_local_rng))) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 8fdf0f97d9d..0c88c35b381 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -102,21 +103,12 @@ using namespace DB; class ZooKeeper final : public IKeeper { public: - struct Node - { - Poco::Net::SocketAddress address; - UInt8 original_index; - bool secure; - }; - - using Nodes = std::vector; - /** Connection to nodes is performed in order. If you want, shuffle them manually. * Operation timeout couldn't be greater than session timeout. * Operation timeout applies independently for network read, network write, waiting for events and synchronization. */ ZooKeeper( - const Nodes & nodes, + const zkutil::ShuffleHosts & nodes, const zkutil::ZooKeeperArgs & args_, std::shared_ptr zk_log_); @@ -130,9 +122,7 @@ public: String getConnectedHostPort() const override { return (original_index == -1) ? "" : args.hosts[original_index]; } int32_t getConnectionXid() const override { return next_xid.load(); } - /// A ZooKeeper session can have an optional deadline set on it. - /// After it has been reached, the session needs to be finalized. - bool hasReachedDeadline() const override; + String tryGetAvailabilityZone() override; /// Useful to check owner of ephemeral node. int64_t getSessionID() const override { return session_id; } @@ -271,7 +261,6 @@ private: clock::time_point time; }; - std::optional client_session_deadline {}; using RequestsQueue = ConcurrentBoundedQueue; RequestsQueue requests_queue{1024}; @@ -316,7 +305,7 @@ private: LoggerPtr log; void connect( - const Nodes & node, + const zkutil::ShuffleHosts & node, Poco::Timespan connection_timeout); void sendHandshake(); @@ -346,9 +335,10 @@ private: void logOperationIfNeeded(const ZooKeeperRequestPtr & request, const ZooKeeperResponsePtr & response = nullptr, bool finalize = false, UInt64 elapsed_microseconds = 0); + std::optional tryGetSystemZnode(const std::string & path, const std::string & description); + void initFeatureFlags(); - void checkSessionDeadline() const; CurrentMetrics::Increment active_session_metric_increment{CurrentMetrics::ZooKeeperSession}; std::shared_ptr zk_log; diff --git a/src/Common/ZooKeeper/examples/CMakeLists.txt b/src/Common/ZooKeeper/examples/CMakeLists.txt index 678b302a512..11669d765f7 100644 --- a/src/Common/ZooKeeper/examples/CMakeLists.txt +++ b/src/Common/ZooKeeper/examples/CMakeLists.txt @@ -1,15 +1,18 @@ clickhouse_add_executable(zkutil_test_commands zkutil_test_commands.cpp) target_link_libraries(zkutil_test_commands PRIVATE clickhouse_common_zookeeper_no_log + clickhouse_functions dbms) clickhouse_add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_lib.cpp) target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper_no_log clickhouse_compression + clickhouse_functions dbms) clickhouse_add_executable(zkutil_test_async zkutil_test_async.cpp) target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper_no_log + clickhouse_functions dbms) diff --git a/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp b/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp index 25d66b94b46..b3a1564b8ab 100644 --- a/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp +++ b/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp @@ -25,24 +25,24 @@ try Poco::Logger::root().setChannel(channel); Poco::Logger::root().setLevel("trace"); - std::string hosts_arg = argv[1]; - std::vector hosts_strings; - splitInto<','>(hosts_strings, hosts_arg); - ZooKeeper::Nodes nodes; - nodes.reserve(hosts_strings.size()); - for (size_t i = 0; i < hosts_strings.size(); ++i) + zkutil::ZooKeeperArgs args{argv[1]}; + zkutil::ShuffleHosts nodes; + nodes.reserve(args.hosts.size()); + for (size_t i = 0; i < args.hosts.size(); ++i) { - std::string host_string = hosts_strings[i]; - bool secure = startsWith(host_string, "secure://"); + zkutil::ShuffleHost node; + std::string host_string = args.hosts[i]; + node.secure = startsWith(host_string, "secure://"); - if (secure) + if (node.secure) host_string.erase(0, strlen("secure://")); - nodes.emplace_back(ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, static_cast(i) , secure}); + node.host = host_string; + node.original_index = i; + + nodes.emplace_back(node); } - - zkutil::ZooKeeperArgs args; ZooKeeper zk(nodes, args, nullptr); Poco::Event event(true); diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 73e1396fb35..410576c2b4a 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -11,10 +11,10 @@ clickhouse_add_executable (small_table small_table.cpp) target_link_libraries (small_table PRIVATE clickhouse_common_io) clickhouse_add_executable (parallel_aggregation parallel_aggregation.cpp) -target_link_libraries (parallel_aggregation PRIVATE dbms) +target_link_libraries (parallel_aggregation PRIVATE dbms clickhouse_functions) clickhouse_add_executable (parallel_aggregation2 parallel_aggregation2.cpp) -target_link_libraries (parallel_aggregation2 PRIVATE dbms) +target_link_libraries (parallel_aggregation2 PRIVATE dbms clickhouse_functions) clickhouse_add_executable (int_hashes_perf int_hashes_perf.cpp) target_link_libraries (int_hashes_perf PRIVATE clickhouse_common_io) @@ -85,7 +85,7 @@ target_link_libraries (interval_tree PRIVATE dbms) if (ENABLE_SSL) clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp) - target_link_libraries (encrypt_decrypt PRIVATE dbms) + target_link_libraries (encrypt_decrypt PRIVATE dbms clickhouse_functions) endif() clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp) diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp index f16330332ab..83c9fbc9573 100644 --- a/src/Compression/CompressedWriteBuffer.cpp +++ b/src/Compression/CompressedWriteBuffer.cpp @@ -57,14 +57,16 @@ void CompressedWriteBuffer::nextImpl() } } -CompressedWriteBuffer::~CompressedWriteBuffer() -{ - finalize(); -} - CompressedWriteBuffer::CompressedWriteBuffer(WriteBuffer & out_, CompressionCodecPtr codec_, size_t buf_size) : BufferWithOwnMemory(buf_size), out(out_), codec(std::move(codec_)) { } +CompressedWriteBuffer::~CompressedWriteBuffer() +{ + if (!canceled) + finalize(); +} + + } diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index ad6f95b3902..9607c345a3b 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -808,7 +808,11 @@ void LogEntryStorage::startCommitLogsPrefetch(uint64_t last_committed_index) con for (; current_index <= max_index_for_prefetch; ++current_index) { - const auto & [changelog_description, position, size] = logs_location.at(current_index); + auto location_it = logs_location.find(current_index); + if (location_it == logs_location.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Location of log entry with index {} is missing", current_index); + + const auto & [changelog_description, position, size] = location_it->second; if (total_size == 0) current_file_info = &file_infos.emplace_back(changelog_description, position, /* count */ 1); else if (total_size + size > commit_logs_cache.size_threshold) @@ -1416,7 +1420,11 @@ LogEntriesPtr LogEntryStorage::getLogEntriesBetween(uint64_t start, uint64_t end } else { - const auto & log_location = logs_location.at(i); + auto location_it = logs_location.find(i); + if (location_it == logs_location.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Location of log entry with index {} is missing", i); + + const auto & log_location = location_it->second; if (!read_info) set_new_file(log_location); diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 02b05c6346c..d94c58f072c 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -7,11 +7,12 @@ #include #include #include +#include #include #include -#include #include #include +#include #include #include #include @@ -27,7 +28,7 @@ #include #include #include -#include +#include #pragma clang diagnostic ignored "-Wdeprecated-declarations" #include @@ -365,6 +366,8 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co LockMemoryExceptionInThread::removeUniqueLock(); }; + asio_opts.thread_pool_size_ = getNumberOfPhysicalCPUCores(); + if (state_manager->isSecure()) { #if USE_SSL diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 238ab07a276..6a1a238aa07 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -518,6 +518,13 @@ bool KeeperStorage::UncommittedState::hasACL(int64_t session_id, bool is_local, return check_auth(storage.session_and_auth[session_id]); } + /// we want to close the session and with that we will remove all the auth related to the session + if (closed_sessions.contains(session_id)) + return false; + + if (check_auth(storage.session_and_auth[session_id])) + return true; + // check if there are uncommitted const auto auth_it = session_and_auth.find(session_id); if (auth_it == session_and_auth.end()) @@ -588,6 +595,10 @@ void KeeperStorage::UncommittedState::applyDeltas(const std::list & new_d auto & uncommitted_auth = session_and_auth[auth_delta->session_id]; uncommitted_auth.push_back(std::pair{delta.zxid, auth_delta->auth_id}); } + else if (const auto * close_session_delta = std::get_if(&delta.operation)) + { + closed_sessions.insert(close_session_delta->session_id); + } } } @@ -688,6 +699,10 @@ void KeeperStorage::UncommittedState::rollback(std::list rollback_deltas) session_and_auth.erase(add_auth->session_id); } } + else if (auto * close_session = std::get_if(&delta.operation)) + { + closed_sessions.erase(close_session->session_id); + } } } @@ -894,6 +909,10 @@ Coordination::Error KeeperStorage::commit(std::list deltas) session_and_auth[operation.session_id].emplace_back(std::move(*operation.auth_id)); return Coordination::Error::ZOK; } + else if constexpr (std::same_as) + { + return Coordination::Error::ZOK; + } else { // shouldn't be called in any process functions @@ -1015,9 +1034,11 @@ struct KeeperStorageHeartbeatRequestProcessor final : public KeeperStorageReques { using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; Coordination::ZooKeeperResponsePtr - process(KeeperStorage & /* storage */, std::list /* deltas */) const override + process(KeeperStorage & storage, std::list deltas) const override { - return zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + response_ptr->error = storage.commit(std::move(deltas)); + return response_ptr; } }; @@ -2433,6 +2454,7 @@ void KeeperStorage::preprocessRequest( } } + new_deltas.emplace_back(transaction->zxid, CloseSessionDelta{session_id}); new_digest = calculateNodesDigest(new_digest, new_deltas); return; } diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index d72ae46dee2..18696115ccf 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -339,6 +339,11 @@ public: std::shared_ptr auth_id; }; + struct CloseSessionDelta + { + int64_t session_id; + }; + using Operation = std::variant< CreateNodeDelta, RemoveNodeDelta, @@ -348,7 +353,8 @@ public: AddAuthDelta, ErrorDelta, SubDeltaEnd, - FailedMultiDelta>; + FailedMultiDelta, + CloseSessionDelta>; struct Delta { @@ -386,6 +392,7 @@ public: std::shared_ptr tryGetNodeFromStorage(StringRef path) const; std::unordered_map>>> session_and_auth; + std::unordered_set closed_sessions; struct UncommittedNode { diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index d314757efc9..a329bec8e2a 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -2019,6 +2019,186 @@ TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitte EXPECT_EQ(acls[0].permissions, 31); } +TEST_P(CoordinationTest, TestPreprocessWhenCloseSessionIsPrecommitted) +{ + using namespace Coordination; + using namespace DB; + + ChangelogDirTest snapshots("./snapshots"); + setSnapshotDirectory("./snapshots"); + ResponsesQueue queue(std::numeric_limits::max()); + SnapshotsQueue snapshots_queue{1}; + int64_t session_without_auth = 1; + int64_t session_with_auth = 2; + size_t term = 0; + + auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); + state_machine->init(); + + auto & storage = state_machine->getStorageUnsafe(); + const auto & uncommitted_state = storage.uncommitted_state; + + auto auth_req = std::make_shared(); + auth_req->scheme = "digest"; + auth_req->data = "test_user:test_password"; + + // Add auth data to the session + auto auth_entry = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), auth_req); + state_machine->pre_commit(1, auth_entry->get_buf()); + state_machine->commit(1, auth_entry->get_buf()); + + std::string node_without_acl = "/node_without_acl"; + { + auto create_req = std::make_shared(); + create_req->path = node_without_acl; + create_req->data = "notmodified"; + auto create_entry = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), create_req); + state_machine->pre_commit(2, create_entry->get_buf()); + state_machine->commit(2, create_entry->get_buf()); + ASSERT_TRUE(storage.container.contains(node_without_acl)); + } + + std::string node_with_acl = "/node_with_acl"; + { + auto create_req = std::make_shared(); + create_req->path = node_with_acl; + create_req->data = "notmodified"; + create_req->acls = {{.permissions = ACL::All, .scheme = "auth", .id = ""}}; + auto create_entry = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), create_req); + state_machine->pre_commit(3, create_entry->get_buf()); + state_machine->commit(3, create_entry->get_buf()); + ASSERT_TRUE(storage.container.contains(node_with_acl)); + } + + auto set_req_with_acl = std::make_shared(); + set_req_with_acl->path = node_with_acl; + set_req_with_acl->data = "modified"; + + auto set_req_without_acl = std::make_shared(); + set_req_without_acl->path = node_without_acl; + set_req_without_acl->data = "modified"; + + const auto reset_node_value + = [&](const auto & path) { storage.container.updateValue(path, [](auto & node) { node.setData("notmodified"); }); }; + + auto close_req = std::make_shared(); + + { + SCOPED_TRACE("Session with Auth"); + + // test we can modify both nodes + auto set_entry = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), set_req_with_acl); + state_machine->pre_commit(5, set_entry->get_buf()); + state_machine->commit(5, set_entry->get_buf()); + ASSERT_TRUE(storage.container.find(node_with_acl)->value.getData() == "modified"); + reset_node_value(node_with_acl); + + set_entry = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), set_req_without_acl); + state_machine->pre_commit(6, set_entry->get_buf()); + state_machine->commit(6, set_entry->get_buf()); + ASSERT_TRUE(storage.container.find(node_without_acl)->value.getData() == "modified"); + reset_node_value(node_without_acl); + + auto close_entry = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), close_req); + + // Pre-commit close session + state_machine->pre_commit(7, close_entry->get_buf()); + + /// will be rejected because we don't have required auth + auto set_entry_with_acl = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), set_req_with_acl); + state_machine->pre_commit(8, set_entry_with_acl->get_buf()); + + /// will be accepted because no ACL + auto set_entry_without_acl = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), set_req_without_acl); + state_machine->pre_commit(9, set_entry_without_acl->get_buf()); + + ASSERT_TRUE(uncommitted_state.getNode(node_with_acl)->getData() == "notmodified"); + ASSERT_TRUE(uncommitted_state.getNode(node_without_acl)->getData() == "modified"); + + state_machine->rollback(9, set_entry_without_acl->get_buf()); + state_machine->rollback(8, set_entry_with_acl->get_buf()); + + // let's commit close and verify we get same outcome + state_machine->commit(7, close_entry->get_buf()); + + /// will be rejected because we don't have required auth + set_entry_with_acl = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), set_req_with_acl); + state_machine->pre_commit(8, set_entry_with_acl->get_buf()); + + /// will be accepted because no ACL + set_entry_without_acl = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), set_req_without_acl); + state_machine->pre_commit(9, set_entry_without_acl->get_buf()); + + ASSERT_TRUE(uncommitted_state.getNode(node_with_acl)->getData() == "notmodified"); + ASSERT_TRUE(uncommitted_state.getNode(node_without_acl)->getData() == "modified"); + + state_machine->commit(8, set_entry_with_acl->get_buf()); + state_machine->commit(9, set_entry_without_acl->get_buf()); + + ASSERT_TRUE(storage.container.find(node_with_acl)->value.getData() == "notmodified"); + ASSERT_TRUE(storage.container.find(node_without_acl)->value.getData() == "modified"); + + reset_node_value(node_without_acl); + } + + { + SCOPED_TRACE("Session without Auth"); + + // test we can modify only node without acl + auto set_entry = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), set_req_with_acl); + state_machine->pre_commit(10, set_entry->get_buf()); + state_machine->commit(10, set_entry->get_buf()); + ASSERT_TRUE(storage.container.find(node_with_acl)->value.getData() == "notmodified"); + + set_entry = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), set_req_without_acl); + state_machine->pre_commit(11, set_entry->get_buf()); + state_machine->commit(11, set_entry->get_buf()); + ASSERT_TRUE(storage.container.find(node_without_acl)->value.getData() == "modified"); + reset_node_value(node_without_acl); + + auto close_entry = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), close_req); + + // Pre-commit close session + state_machine->pre_commit(12, close_entry->get_buf()); + + /// will be rejected because we don't have required auth + auto set_entry_with_acl = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), set_req_with_acl); + state_machine->pre_commit(13, set_entry_with_acl->get_buf()); + + /// will be accepted because no ACL + auto set_entry_without_acl = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), set_req_without_acl); + state_machine->pre_commit(14, set_entry_without_acl->get_buf()); + + ASSERT_TRUE(uncommitted_state.getNode(node_with_acl)->getData() == "notmodified"); + ASSERT_TRUE(uncommitted_state.getNode(node_without_acl)->getData() == "modified"); + + state_machine->rollback(14, set_entry_without_acl->get_buf()); + state_machine->rollback(13, set_entry_with_acl->get_buf()); + + // let's commit close and verify we get same outcome + state_machine->commit(12, close_entry->get_buf()); + + /// will be rejected because we don't have required auth + set_entry_with_acl = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), set_req_with_acl); + state_machine->pre_commit(13, set_entry_with_acl->get_buf()); + + /// will be accepted because no ACL + set_entry_without_acl = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), set_req_without_acl); + state_machine->pre_commit(14, set_entry_without_acl->get_buf()); + + ASSERT_TRUE(uncommitted_state.getNode(node_with_acl)->getData() == "notmodified"); + ASSERT_TRUE(uncommitted_state.getNode(node_without_acl)->getData() == "modified"); + + state_machine->commit(13, set_entry_with_acl->get_buf()); + state_machine->commit(14, set_entry_without_acl->get_buf()); + + ASSERT_TRUE(storage.container.find(node_with_acl)->value.getData() == "notmodified"); + ASSERT_TRUE(storage.container.find(node_without_acl)->value.getData() == "modified"); + + reset_node_value(node_without_acl); + } +} + TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) { using namespace Coordination; diff --git a/src/Core/Defines.h b/src/Core/Defines.h index b7675b55b87..6df335a9c8f 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -90,13 +90,13 @@ static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_POLICY = "SLRU"; static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB; static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5l; static constexpr auto DEFAULT_MARK_CACHE_POLICY = "SLRU"; -static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5368_MiB; +static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5_GiB; static constexpr auto DEFAULT_MARK_CACHE_SIZE_RATIO = 0.5l; static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY = "SLRU"; static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE = 0; static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5; static constexpr auto DEFAULT_INDEX_MARK_CACHE_POLICY = "SLRU"; -static constexpr auto DEFAULT_INDEX_MARK_CACHE_MAX_SIZE = 5368_MiB; +static constexpr auto DEFAULT_INDEX_MARK_CACHE_MAX_SIZE = 5_GiB; static constexpr auto DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO = 0.3; static constexpr auto DEFAULT_MMAP_CACHE_MAX_SIZE = 1_KiB; /// chosen by rolling dice static constexpr auto DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE = 128_MiB; diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index 3fc9e089451..4c0848c0706 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -63,6 +63,9 @@ const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET "; /// Marker for SSH-keys-based authentication (passed as the user name) const char SSH_KEY_AUTHENTICAION_MARKER[] = " SSH KEY AUTHENTICATION "; +/// Market for JSON Web Token authentication +const char JWT_AUTHENTICAION_MARKER[] = " JWT AUTHENTICATION "; + }; namespace Protocol diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 309becdd78f..68ac45fa24f 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -14,6 +14,7 @@ class AbstractConfiguration; namespace DB { +// clang-format off #define SERVER_SETTINGS(M, ALIAS) \ M(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \ M(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \ @@ -85,10 +86,12 @@ namespace DB M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \ M(UInt64, page_cache_chunk_size, 2 << 20, "Bytes per chunk in userspace page cache. Rounded up to a multiple of page size (typically 4 KiB) or huge page size (typically 2 MiB, only if page_cache_use_thp is enabled).", 0) \ M(UInt64, page_cache_mmap_size, 1 << 30, "Bytes per memory mapping in userspace page cache. Not important.", 0) \ - M(UInt64, page_cache_size, 10ul << 30, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \ + M(UInt64, page_cache_size, 0, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \ M(Bool, page_cache_use_madv_free, DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE, "If true, the userspace page cache will allow the OS to automatically reclaim memory from the cache on memory pressure (using MADV_FREE).", 0) \ M(Bool, page_cache_use_transparent_huge_pages, true, "Userspace will attempt to use transparent huge pages on Linux. This is best-effort.", 0) \ M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \ + M(UInt64, compiled_expression_cache_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE, "Byte size of compiled expressions cache.", 0) \ + M(UInt64, compiled_expression_cache_elements_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES, "Maximum entries in compiled expressions cache.", 0) \ \ M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \ M(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \ @@ -102,6 +105,8 @@ namespace DB M(UInt64, max_dictionary_num_to_warn, 1000lu, "If the number of dictionaries is greater than this value, the server will create a warning that will displayed to user.", 0) \ M(UInt64, max_database_num_to_warn, 1000lu, "If the number of databases is greater than this value, the server will create a warning that will displayed to user.", 0) \ M(UInt64, max_part_num_to_warn, 100000lu, "If the number of parts is greater than this value, the server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_table_num_to_throw, 0lu, "If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.", 0) \ + M(UInt64, max_database_num_to_throw, 0lu, "If number of databases is greater than this value, server will throw an exception. 0 means no limitation.", 0) \ M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \ M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \ \ @@ -146,7 +151,10 @@ namespace DB M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ + M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ + M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ + M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 8257b94cd9f..9c9c9c1db00 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -142,6 +142,7 @@ void Settings::applyCompatibilitySetting(const String & compatibility_value) return; ClickHouseVersion version(compatibility_value); + const auto & settings_changes_history = getSettingsChangesHistory(); /// Iterate through ClickHouse version in descending order and apply reversed /// changes for each version that is higher that version from compatibility setting for (auto it = settings_changes_history.rbegin(); it != settings_changes_history.rend(); ++it) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6197a7cf6e1..14fe0924b40 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -31,6 +31,7 @@ class IColumn; * for tracking settings changes in different versions and for special `compatibility` setting to work correctly. */ +// clang-format off #define COMMON_SETTINGS(M, ALIAS) \ M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ @@ -469,7 +470,7 @@ class IColumn; M(UInt64, max_rows_in_join, 0, "Maximum size of the hash table for JOIN (in number of rows).", 0) \ M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \ M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \ - M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \ + M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key. Can be applied only to hash join and storage join.", IMPORTANT) \ M(JoinAlgorithm, join_algorithm, JoinAlgorithm::DEFAULT, "Specify join algorithm.", 0) \ M(UInt64, cross_join_min_rows_to_compress, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.", 0) \ M(UInt64, cross_join_min_bytes_to_compress, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.", 0) \ @@ -933,6 +934,7 @@ class IColumn; M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \ M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \ + M(Bool, allow_deprecated_snowflake_conversion_functions, false, "Enables deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. @@ -1090,6 +1092,7 @@ class IColumn; M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ + M(Bool, input_format_json_ignore_key_case, false, "Ignore json key case while read json field from string", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ @@ -1158,6 +1161,7 @@ class IColumn; M(Bool, output_format_parquet_parallel_encoding, true, "Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder.", 0) \ M(UInt64, output_format_parquet_data_page_size, 1024 * 1024, "Target page size in bytes, before compression.", 0) \ M(UInt64, output_format_parquet_batch_size, 1024, "Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs.", 0) \ + M(Bool, output_format_parquet_write_page_index, true, "Add a possibility to write page index into parquet files.", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy', 'zstd'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp new file mode 100644 index 00000000000..194a0024f2b --- /dev/null +++ b/src/Core/SettingsChangesHistory.cpp @@ -0,0 +1,323 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + +ClickHouseVersion::ClickHouseVersion(const String & version) +{ + Strings split; + boost::split(split, version, [](char c){ return c == '.'; }); + components.reserve(split.size()); + if (split.empty()) + throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; + + for (const auto & split_element : split) + { + size_t component; + ReadBufferFromString buf(split_element); + if (!tryReadIntText(component, buf) || !buf.eof()) + throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; + components.push_back(component); + } +} + +ClickHouseVersion::ClickHouseVersion(const char * version) + : ClickHouseVersion(String(version)) +{ +} + +String ClickHouseVersion::toString() const +{ + String version = std::to_string(components[0]); + for (size_t i = 1; i < components.size(); ++i) + version += "." + std::to_string(components[i]); + + return version; +} + +// clang-format off +/// History of settings changes that controls some backward incompatible changes +/// across all ClickHouse versions. It maps ClickHouse version to settings changes that were done +/// in this version. This history contains both changes to existing settings and newly added settings. +/// Settings changes is a vector of structs +/// {setting_name, previous_value, new_value, reason}. +/// For newly added setting choose the most appropriate previous_value (for example, if new setting +/// controls new feature and it's 'true' by default, use 'false' as previous_value). +/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) +/// Note: please check if the key already exists to prevent duplicate entries. +static std::initializer_list> settings_changes_history_initializer = +{ + {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + }}, + {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, + {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, + {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, + {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, + {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, + {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, + {"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"}, + {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, + {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, + {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, + {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, + {"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"}, + {"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"}, + {"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."}, + {"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."}, + {"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"}, + {"allow_deprecated_snowflake_conversion_functions", true, false, "Disabled deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake."}, + {"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."}, + {"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."}, + {"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."}, + {"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."}, + {"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"}, + {"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"}, + {"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"}, + {"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."}, + {"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."}, + {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, + {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, + {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, + {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."}, + }}, + {"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"}, + {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, + {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, + {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, + {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, + {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, + {"http_max_chunk_size", 0, 0, "Internal limitation"}, + {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, + {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, + {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, + {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, + {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, + }}, + {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, + {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, + {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, + {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, + {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, + {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"}, + {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, + {"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"}, + {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, + {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"}, + {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."}, + {"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"}, + {"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"}, + }}, + {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, + {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, + {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, + {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, + {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, + {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, + {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, + {"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."}, + {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."}, + {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, + {"log_processors_profiles", false, true, "Enable by default"}, + {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."}, + {"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"}, + {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, + {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"}, + {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"}, + {"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"}, + {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"}, + {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"}, + {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"}, + {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"}, + {"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."}, + {"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."}, + {"allow_get_client_http_header", false, false, "Introduced a new function."}, + {"output_format_pretty_row_numbers", false, true, "It is better for usability."}, + {"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."}, + {"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, + {"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, + {"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, + {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, + {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, + {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."}, + {"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."}, + {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."}, + {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."}, + {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."}, + {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, + {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, + {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, + {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, + {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, + {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, + }}, + {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, + {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, + {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"}, + {"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"}, + {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"}, + {"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"}, + {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."}, + {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"}, + {"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"}, + {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"}, + {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"}, + {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"}, + {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}, + {"format_template_row_format", "", "", "Template row format string can be set directly in query"}, + {"format_template_resultset_format", "", "", "Template result set format string can be set in query"}, + {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, + {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}, + {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}, + {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"}, + {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."}, + {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."}, + {"optimize_time_filter_with_preimage", true, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')"}, + {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, + {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, + {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, + {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, + {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + }}, + {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, + {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, + {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, + {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, + {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, + {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, + {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, + {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, + {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, + {"enable_vertical_final", false, true, "Use vertical final by default"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, + {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, + {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, + {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, + {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, + {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, + {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, + {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, + {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, + {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, + {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, + {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, + {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, + {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, + {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, + {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, + {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, + {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, + {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, + {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, + {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, + {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, + {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, + {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, + {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, + {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, + {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, + {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, + {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, + {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, + {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, + {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, + {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, + {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, + {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, + {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, + {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, + {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, + {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, + {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, + {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, + {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, + {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, + {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, + {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, + {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, + {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, + {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, + {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, + {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, + {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, + {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, + {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, + {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, + {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, + {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, + {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, + {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, + {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, + {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, + {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, + {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, + {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, + {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, + {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, + {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, + {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, + {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, + {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, + {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, + {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, + {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, + {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, + {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, + {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, + {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, + {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, + {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, + {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, + {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, + {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, + {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, + {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, + {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, + {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, + {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, + {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, + {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, + {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, + {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, +}; + + +const std::map & getSettingsChangesHistory() +{ + static std::map settings_changes_history; + + static std::once_flag initialized_flag; + std::call_once(initialized_flag, []() + { + for (const auto & setting_change : settings_changes_history_initializer) + { + /// Disallow duplicate keys in the settings changes history. Example: + /// {"21.2", {{"some_setting_1", false, true, "[...]"}}}, + /// [...] + /// {"21.2", {{"some_setting_2", false, true, "[...]"}}}, + /// As std::set has unique keys, one of the entries would be overwritten. + if (settings_changes_history.contains(setting_change.first)) + throw Exception{ErrorCodes::LOGICAL_ERROR, "Detected duplicate version '{}'", setting_change.first.toString()}; + + settings_changes_history[setting_change.first] = setting_change.second; + } + }); + + return settings_changes_history; +} +} diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index eddf83f7912..b1a69c3b6d6 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -1,62 +1,25 @@ #pragma once #include -#include -#include -#include -#include #include +#include namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class ClickHouseVersion { public: - ClickHouseVersion(const String & version) /// NOLINT(google-explicit-constructor) - { - Strings split; - boost::split(split, version, [](char c){ return c == '.'; }); - components.reserve(split.size()); - if (split.empty()) - throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; + /// NOLINTBEGIN(google-explicit-constructor) + ClickHouseVersion(const String & version); + ClickHouseVersion(const char * version); + /// NOLINTEND(google-explicit-constructor) - for (const auto & split_element : split) - { - size_t component; - ReadBufferFromString buf(split_element); - if (!tryReadIntText(component, buf) || !buf.eof()) - throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; - components.push_back(component); - } - } + String toString() const; - ClickHouseVersion(const char * version) : ClickHouseVersion(String(version)) {} /// NOLINT(google-explicit-constructor) - - String toString() const - { - String version = std::to_string(components[0]); - for (size_t i = 1; i < components.size(); ++i) - version += "." + std::to_string(components[i]); - - return version; - } - - bool operator<(const ClickHouseVersion & other) const - { - return components < other.components; - } - - bool operator>=(const ClickHouseVersion & other) const - { - return components >= other.components; - } + bool operator<(const ClickHouseVersion & other) const { return components < other.components; } + bool operator>=(const ClickHouseVersion & other) const { return components >= other.components; } private: std::vector components; @@ -75,249 +38,6 @@ namespace SettingsChangesHistory using SettingsChanges = std::vector; } -/// History of settings changes that controls some backward incompatible changes -/// across all ClickHouse versions. It maps ClickHouse version to settings changes that were done -/// in this version. This history contains both changes to existing settings and newly added settings. -/// Settings changes is a vector of structs -/// {setting_name, previous_value, new_value, reason}. -/// For newly added setting choose the most appropriate previous_value (for example, if new setting -/// controls new feature and it's 'true' by default, use 'false' as previous_value). -/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) -static const std::map settings_changes_history = -{ - {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, - {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, - {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, - {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, - {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, - {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, - {"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"}, - {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, - {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, - {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, - {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, - {"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"}, - {"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"}, - {"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."}, - {"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."}, - {"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"}, - {"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."}, - {"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."}, - {"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."}, - {"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."}, - {"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"}, - {"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"}, - {"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"}, - {"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."}, - {"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."}, - {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, - }}, - {"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"}, - {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, - {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, - {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, - {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, - {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, - {"http_max_chunk_size", 0, 0, "Internal limitation"}, - {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, - {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, - {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, - {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, - {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, - }}, - {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, - {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, - {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, - {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, - {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, - {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"}, - {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, - {"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"}, - {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, - {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"}, - {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."}, - {"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"}, - {"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"}, - }}, - {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, - {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, - {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, - {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, - {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, - {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, - {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, - {"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."}, - {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."}, - {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, - {"log_processors_profiles", false, true, "Enable by default"}, - {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."}, - {"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"}, - {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, - {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"}, - {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"}, - {"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"}, - {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"}, - {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"}, - {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"}, - {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"}, - {"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."}, - {"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."}, - {"allow_get_client_http_header", false, false, "Introduced a new function."}, - {"output_format_pretty_row_numbers", false, true, "It is better for usability."}, - {"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."}, - {"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, - {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, - {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."}, - {"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."}, - {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."}, - {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."}, - {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."}, - {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, - {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, - {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, - {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, - }}, - {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, - {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, - {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"}, - {"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"}, - {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"}, - {"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"}, - {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."}, - {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"}, - {"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"}, - {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"}, - {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"}, - {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"}, - {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}, - {"format_template_row_format", "", "", "Template row format string can be set directly in query"}, - {"format_template_resultset_format", "", "", "Template result set format string can be set in query"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}, - {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}, - {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"}, - {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."}, - {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."}, - {"optimize_time_filter_with_preimage", true, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')"}, - {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, - {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, - {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, - {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, - {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - }}, - {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, - {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, - {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, - {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, - {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, - {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, - {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, - {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, - {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, - {"enable_vertical_final", false, true, "Use vertical final by default"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, - {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, - {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, - {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, - {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, - {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, - {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, - {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, - {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, - {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, - {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, - {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, - {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, - {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, - {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, - {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, - {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, - {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, - {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, - {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, - {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, - {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, - {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, - {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, - {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, - {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, - {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, - {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, - {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, - {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, - {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, - {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}}}, - {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, - {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, - {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, - {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, - {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, - {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, - {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, - {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, - {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, - {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, - {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, - {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, - {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, - {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, - {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, - {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, - {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, - {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, - {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"23.4", {{"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}}}, - {"23.4", {{"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, -}; +const std::map & getSettingsChangesHistory(); } diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 05985316566..18034d846df 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -201,13 +201,13 @@ IMPLEMENT_SETTING_ENUM(ORCCompression, ErrorCodes::BAD_ARGUMENTS, {"zlib", FormatSettings::ORCCompression::ZLIB}, {"lz4", FormatSettings::ORCCompression::LZ4}}) -IMPLEMENT_SETTING_ENUM(S3QueueMode, ErrorCodes::BAD_ARGUMENTS, - {{"ordered", S3QueueMode::ORDERED}, - {"unordered", S3QueueMode::UNORDERED}}) +IMPLEMENT_SETTING_ENUM(ObjectStorageQueueMode, ErrorCodes::BAD_ARGUMENTS, + {{"ordered", ObjectStorageQueueMode::ORDERED}, + {"unordered", ObjectStorageQueueMode::UNORDERED}}) -IMPLEMENT_SETTING_ENUM(S3QueueAction, ErrorCodes::BAD_ARGUMENTS, - {{"keep", S3QueueAction::KEEP}, - {"delete", S3QueueAction::DELETE}}) +IMPLEMENT_SETTING_ENUM(ObjectStorageQueueAction, ErrorCodes::BAD_ARGUMENTS, + {{"keep", ObjectStorageQueueAction::KEEP}, + {"delete", ObjectStorageQueueAction::DELETE}}) IMPLEMENT_SETTING_ENUM(ExternalCommandStderrReaction, ErrorCodes::BAD_ARGUMENTS, {{"none", ExternalCommandStderrReaction::NONE}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 575cd8700c8..2d65bfc7463 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -341,21 +341,21 @@ DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType) DECLARE_SETTING_ENUM(LocalFSReadMethod) -enum class S3QueueMode : uint8_t +enum class ObjectStorageQueueMode : uint8_t { ORDERED, UNORDERED, }; -DECLARE_SETTING_ENUM(S3QueueMode) +DECLARE_SETTING_ENUM(ObjectStorageQueueMode) -enum class S3QueueAction : uint8_t +enum class ObjectStorageQueueAction : uint8_t { KEEP, DELETE, }; -DECLARE_SETTING_ENUM(S3QueueAction) +DECLARE_SETTING_ENUM(ObjectStorageQueueAction) DECLARE_SETTING_ENUM(ExternalCommandStderrReaction) diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index 75a01a6190f..c85e8f5688a 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -30,8 +30,8 @@ namespace { friend void tryVisitNestedSelect(const String & query, DDLDependencyVisitorData & data); public: - DDLDependencyVisitorData(const ContextPtr & context_, const QualifiedTableName & table_name_, const ASTPtr & ast_) - : create_query(ast_), table_name(table_name_), current_database(context_->getCurrentDatabase()), context(context_) + DDLDependencyVisitorData(const ContextPtr & global_context_, const QualifiedTableName & table_name_, const ASTPtr & ast_, const String & current_database_) + : create_query(ast_), table_name(table_name_), default_database(global_context_->getCurrentDatabase()), current_database(current_database_), global_context(global_context_) { } @@ -71,8 +71,9 @@ namespace ASTPtr create_query; std::unordered_set skip_asts; QualifiedTableName table_name; + String default_database; String current_database; - ContextPtr context; + ContextPtr global_context; TableNamesSet dependencies; /// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query. @@ -95,6 +96,11 @@ namespace as_table.database = current_database; dependencies.emplace(as_table); } + + /// Visit nested select query only for views, for other cases it's not + /// an actual dependency as it will be executed only once to fill the table. + if (create.select && !create.isView()) + skip_asts.insert(create.select); } /// The definition of a dictionary: SOURCE(CLICKHOUSE(...)) LAYOUT(...) LIFETIME(...) @@ -103,8 +109,8 @@ namespace if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements) return; - auto config = getDictionaryConfigurationFromAST(create_query->as(), context); - auto info = getInfoIfClickHouseDictionarySource(config, context); + auto config = getDictionaryConfigurationFromAST(create_query->as(), global_context); + auto info = getInfoIfClickHouseDictionarySource(config, global_context); /// We consider only dependencies on local tables. if (!info || !info->is_local) @@ -112,14 +118,21 @@ namespace if (!info->table_name.table.empty()) { + /// If database is not specified in dictionary source, use database of the dictionary itself, not the current/default database. if (info->table_name.database.empty()) - info->table_name.database = current_database; + info->table_name.database = table_name.database; dependencies.emplace(std::move(info->table_name)); } else { - /// We don't have a table name, we have a select query instead + /// We don't have a table name, we have a select query instead. + /// All tables from select query in dictionary definition won't + /// use current database, as this query is executed with global context. + /// Use default database from global context while visiting select query. + String current_database_ = current_database; + current_database = default_database; tryVisitNestedSelect(info->query, *this); + current_database = current_database_; } } @@ -176,7 +189,7 @@ namespace if (auto cluster_name = tryGetClusterNameFromArgument(table_engine, 0)) { - auto cluster = context->tryGetCluster(*cluster_name); + auto cluster = global_context->tryGetCluster(*cluster_name); if (cluster && cluster->getLocalShardCount()) has_local_replicas = true; } @@ -231,7 +244,7 @@ namespace { if (auto cluster_name = tryGetClusterNameFromArgument(function, 0)) { - if (auto cluster = context->tryGetCluster(*cluster_name)) + if (auto cluster = global_context->tryGetCluster(*cluster_name)) { if (cluster->getLocalShardCount()) has_local_replicas = true; @@ -303,7 +316,10 @@ namespace try { /// We're just searching for dependencies here, it's not safe to execute subqueries now. - auto evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + /// Use copy of the global_context and set current database, because expressions can contain currentDatabase() function. + ContextMutablePtr global_context_copy = Context::createCopy(global_context); + global_context_copy->setCurrentDatabase(current_database); + auto evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, global_context_copy); const auto * literal = evaluated->as(); if (!literal || (literal->value.getType() != Field::Types::String)) return {}; @@ -444,7 +460,7 @@ namespace ParserSelectWithUnionQuery parser; String description = fmt::format("Query for ClickHouse dictionary {}", data.table_name); String fixed_query = removeWhereConditionPlaceholder(query); - const Settings & settings = data.context->getSettingsRef(); + const Settings & settings = data.global_context->getSettingsRef(); ASTPtr select = parseQuery(parser, fixed_query, description, settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); @@ -459,12 +475,19 @@ namespace } -TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & context, const QualifiedTableName & table_name, const ASTPtr & ast) +TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & current_database) { - DDLDependencyVisitor::Data data{context, table_name, ast}; + DDLDependencyVisitor::Data data{global_global_context, table_name, ast, current_database}; DDLDependencyVisitor::Visitor visitor{data}; visitor.visit(ast); return std::move(data).getDependencies(); } +TableNamesSet getDependenciesFromDictionaryNestedSelectQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & select_query, const String & current_database) +{ + DDLDependencyVisitor::Data data{global_context, table_name, ast, current_database}; + tryVisitNestedSelect(select_query, data); + return std::move(data).getDependencies(); +} + } diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index 29ea6298b04..400e6b04108 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -13,6 +13,9 @@ using TableNamesSet = std::unordered_set; /// Returns a list of all tables explicitly referenced in the create query of a specified table. /// For example, a column default expression can use dictGet() and thus reference a dictionary. /// Does not validate AST, works a best-effort way. -TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & context, const QualifiedTableName & table_name, const ASTPtr & ast); +TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & current_database); + +/// Returns a list of all tables explicitly referenced in the select query specified as a dictionary source. +TableNamesSet getDependenciesFromDictionaryNestedSelectQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & select_query, const String & current_database); } diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index b8690125aaa..40234abb20f 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -110,19 +110,30 @@ void DDLLoadingDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments & auto config = getDictionaryConfigurationFromAST(data.create_query->as(), data.global_context); auto info = getInfoIfClickHouseDictionarySource(config, data.global_context); - if (!info || !info->is_local || info->table_name.table.empty()) + if (!info || !info->is_local) return; - if (info->table_name.database.empty()) - info->table_name.database = data.default_database; - data.dependencies.emplace(std::move(info->table_name)); + if (!info->table_name.table.empty()) + { + /// If database is not specified in dictionary source, use database of the dictionary itself, not the current/default database. + if (info->table_name.database.empty()) + info->table_name.database = data.table_name.database; + data.dependencies.emplace(std::move(info->table_name)); + } + else + { + /// We don't have a table name, we have a select query instead that will be executed during dictionary loading. + /// We need to find all tables used in this select query and add them to dependencies. + auto select_query_dependencies = getDependenciesFromDictionaryNestedSelectQuery(data.global_context, data.table_name, data.create_query, info->query, data.default_database); + data.dependencies.merge(select_query_dependencies); + } } void DDLLoadingDependencyVisitor::visit(const ASTStorage & storage, Data & data) { if (storage.ttl_table) { - auto ttl_dependensies = getDependenciesFromCreateQuery(data.global_context, data.table_name, storage.ttl_table->ptr()); + auto ttl_dependensies = getDependenciesFromCreateQuery(data.global_context, data.table_name, storage.ttl_table->ptr(), data.default_database); data.dependencies.merge(ttl_dependensies); } diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index e72834eddbe..233db07cd68 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -186,6 +186,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists.", backQuote(database_name), backQuote(table_name)); it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name); + CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); } @@ -202,6 +203,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta if (it->second.expiration_iterator != cache_expiration_queue.end()) cache_expiration_queue.erase(it->second.expiration_iterator); tables_cache.erase(it); + CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); } return res; diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index b82cf885b4a..86bf0471b8f 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -154,7 +154,7 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl applyMetadataChangesToCreateQuery(it->second, metadata); /// The create query of the table has been just changed, we need to update dependencies too. - auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second); + auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second, local_context->getCurrentDatabase()); auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second); DatabaseCatalog::instance().updateDependencies(table_id, ref_dependencies, loading_dependencies); } diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 10a8e06e8f0..7d4bb07e8ef 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -539,7 +539,7 @@ void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & ta } /// The create query of the table has been just changed, we need to update dependencies too. - auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast); + auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast, local_context->getCurrentDatabase()); auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast); DatabaseCatalog::instance().updateDependencies(table_id, ref_dependencies, loading_dependencies); diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index badfedeec9b..4ca9afc49eb 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -65,6 +65,7 @@ static constexpr const char * REPLICATED_DATABASE_MARK = "DatabaseReplicated"; static constexpr const char * DROPPED_MARK = "DROPPED"; static constexpr const char * BROKEN_TABLES_SUFFIX = "_broken_tables"; static constexpr const char * BROKEN_REPLICATED_TABLES_SUFFIX = "_broken_replicated_tables"; +static constexpr const char * FIRST_REPLICA_DATABASE_NAME = "first_replica_database_name"; static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768; @@ -73,9 +74,10 @@ zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const return getContext()->getZooKeeper(); } -static inline String getHostID(ContextPtr global_context, const UUID & db_uuid) +static inline String getHostID(ContextPtr global_context, const UUID & db_uuid, bool secure) { - return Cluster::Address::toString(getFQDNOrHostName(), global_context->getTCPPort()) + ':' + toString(db_uuid); + UInt16 port = secure ? global_context->getTCPPortSecure().value_or(DBMS_DEFAULT_SECURE_PORT) : global_context->getTCPPort(); + return Cluster::Address::toString(getFQDNOrHostName(), port) + ':' + toString(db_uuid); } static inline UInt64 getMetadataHash(const String & table_name, const String & metadata) @@ -415,8 +417,10 @@ void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessL return; } - String host_id = getHostID(getContext(), db_uuid); - if (is_create_query || replica_host_id != host_id) + String host_id = getHostID(getContext(), db_uuid, cluster_auth_info.cluster_secure_connection); + String host_id_default = getHostID(getContext(), db_uuid, false); + + if (is_create_query || (replica_host_id != host_id && replica_host_id != host_id_default)) { throw Exception( ErrorCodes::REPLICA_ALREADY_EXISTS, @@ -424,6 +428,14 @@ void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessL replica_name, shard_name, zookeeper_path, replica_host_id, host_id); } + /// Before 24.6 we always created host_id with insecure port, even if cluster_auth_info.cluster_secure_connection was true. + /// So not to break compatibility, we need to update host_id to secure one if cluster_auth_info.cluster_secure_connection is true. + if (host_id != host_id_default && replica_host_id == host_id_default) + { + current_zookeeper->set(replica_path, host_id, -1); + createEmptyLogEntry(current_zookeeper); + } + /// Check that replica_group_name in ZooKeeper matches the local one and change it if necessary. String zk_replica_group_name; if (!current_zookeeper->tryGet(replica_path + "/replica_group", zk_replica_group_name)) @@ -454,6 +466,13 @@ void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessL return; } + /// If not exist, create a node with the database name for introspection. + /// Technically, the database may have different names on different replicas, but this is not a usual case and we only save the first one + auto db_name_path = fs::path(zookeeper_path) / FIRST_REPLICA_DATABASE_NAME; + auto error_code = current_zookeeper->trySet(db_name_path, getDatabaseName()); + if (error_code == Coordination::Error::ZNONODE) + current_zookeeper->tryCreate(db_name_path, getDatabaseName(), zkutil::CreateMode::Persistent); + is_readonly = false; } catch (...) @@ -550,7 +569,7 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt "already contains some data and it does not look like Replicated database path.", zookeeper_path); /// Write host name to replica_path, it will protect from multiple replicas with the same name - auto host_id = getHostID(getContext(), db_uuid); + auto host_id = getHostID(getContext(), db_uuid, cluster_auth_info.cluster_secure_connection); for (int attempts = 10; attempts > 0; --attempts) { @@ -1146,7 +1165,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep /// And QualifiedTableName::parseFromString doesn't handle this. auto qualified_name = QualifiedTableName{.database = getDatabaseName(), .table = table_name}; auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_table_query); - tables_dependencies.addDependencies(qualified_name, getDependenciesFromCreateQuery(getContext(), qualified_name, query_ast)); + tables_dependencies.addDependencies(qualified_name, getDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ast, getContext()->getCurrentDatabase())); } tables_dependencies.checkNoCyclicDependencies(); @@ -1371,6 +1390,13 @@ void DatabaseReplicated::drop(ContextPtr context_) } } +void DatabaseReplicated::renameDatabase(ContextPtr query_context, const String & new_name) +{ + DatabaseAtomic::renameDatabase(query_context, new_name); + auto db_name_path = fs::path(zookeeper_path) / FIRST_REPLICA_DATABASE_NAME; + getZooKeeper()->set(db_name_path, getDatabaseName()); +} + void DatabaseReplicated::stopReplication() { if (ddl_worker) diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 761d6b4b503..eab5b2ff931 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -86,6 +86,8 @@ public: std::vector tryGetAreReplicasActive(const ClusterPtr & cluster_) const; + void renameDatabase(ContextPtr query_context, const String & new_name) override; + friend struct DatabaseReplicatedTask; friend class DatabaseReplicatedDDLWorker; private: diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index fd38a31da5c..6426123bb4f 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -260,7 +260,9 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n res = it->second; tables.erase(it); res->is_detached = true; - CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); + + if (res->isSystemStorage() == false) + CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); auto table_id = res->getStorageID(); if (table_id.hasUUID()) @@ -301,7 +303,9 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c /// It is important to reset is_detached here since in case of RENAME in /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - CurrentMetrics::add(getAttachedCounterForStorage(table), 1); + + if (table->isSystemStorage() == false && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) + CurrentMetrics::add(getAttachedCounterForStorage(table), 1); } void DatabaseWithOwnTablesBase::shutdown() diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index 6aa13b7b759..733e5d53981 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -137,7 +137,7 @@ void TablesLoader::buildDependencyGraph() { for (const auto & [table_name, table_metadata] : metadata.parsed_tables) { - auto new_ref_dependencies = getDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast); + auto new_ref_dependencies = getDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast, global_context->getCurrentDatabase()); auto new_loading_dependencies = getLoadingDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast); if (!new_ref_dependencies.empty()) diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index 9b575c65bce..f06f5ba8e17 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -355,6 +355,8 @@ public: { return delegate->getS3StorageClient(); } + + std::shared_ptr tryGetS3StorageClient() const override { return delegate->tryGetS3StorageClient(); } #endif private: diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 658acb01c74..4781839cb01 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -478,6 +478,8 @@ public: "Method getS3StorageClient() is not implemented for disk type: {}", getDataSourceDescription().toString()); } + + virtual std::shared_ptr tryGetS3StorageClient() const { return nullptr; } #endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp index bae58f0b9c6..1a5388349f8 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -181,7 +181,7 @@ std::unique_ptr getAzureBlobStorageClientWithAuth( if (config.getBool(config_prefix + ".use_workload_identity", false)) { auto workload_identity_credential = std::make_shared(); - return std::make_unique(url, workload_identity_credential); + return std::make_unique(url, workload_identity_credential, client_options); } auto managed_identity_credential = std::make_shared(); diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index e7ecf7cd515..86a035f3be7 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -127,25 +127,22 @@ bool AzureObjectStorage::exists(const StoredObject & object) const { auto client_ptr = client.get(); - /// What a shame, no Exists method... - Azure::Storage::Blobs::ListBlobsOptions options; - options.Prefix = object.remote_path; - options.PageSizeHint = 1; - - ProfileEvents::increment(ProfileEvents::AzureListObjects); + ProfileEvents::increment(ProfileEvents::AzureGetProperties); if (client_ptr->GetClickhouseOptions().IsClientForDisk) - ProfileEvents::increment(ProfileEvents::DiskAzureListObjects); + ProfileEvents::increment(ProfileEvents::DiskAzureGetProperties); - auto blobs_list_response = client_ptr->ListBlobs(options); - auto blobs_list = blobs_list_response.Blobs; - - for (const auto & blob : blobs_list) + try { - if (object.remote_path == blob.Name) - return true; + auto blob_client = client_ptr->GetBlobClient(object.remote_path); + blob_client.GetProperties(); + return true; + } + catch (const Azure::Storage::StorageException & e) + { + if (e.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) + return false; + throw; } - - return false; } ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const @@ -160,7 +157,9 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith { auto client_ptr = client.get(); - /// What a shame, no Exists method... + /// NOTE: list doesn't work if endpoint contains non-empty prefix for blobs. + /// See AzureBlobStorageEndpoint and processAzureBlobStorageEndpoint for details. + Azure::Storage::Blobs::ListBlobsOptions options; options.Prefix = path; if (max_keys) diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 6a5a75c08f0..727dbeed853 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -138,6 +138,11 @@ public: { return object_storage->getS3StorageClient(); } + + std::shared_ptr tryGetS3StorageClient() override + { + return object_storage->tryGetS3StorageClient(); + } #endif private: diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 5803a985000..4de6d78e952 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -587,6 +587,11 @@ std::shared_ptr DiskObjectStorage::getS3StorageClient() const { return object_storage->getS3StorageClient(); } + +std::shared_ptr DiskObjectStorage::tryGetS3StorageClient() const +{ + return object_storage->tryGetS3StorageClient(); +} #endif DiskPtr DiskObjectStorageReservation::getDisk(size_t i) const diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index ffef0a007da..59cc82d8c81 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -214,6 +214,7 @@ public: #if USE_AWS_S3 std::shared_ptr getS3StorageClient() const override; + std::shared_ptr tryGetS3StorageClient() const override; #endif private: diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 7bc9e4073db..9f5c14fdb7c 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -127,8 +127,10 @@ public: /// /, /a, /a/b, /a/b/c, /a/b/c/d while exists will return true only for /a/b/c/d virtual bool existsOrHasAnyChild(const std::string & path) const; + /// List objects recursively by certain prefix. virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const; + /// List objects recursively by certain prefix. Use it instead of listObjects, if you want to list objects lazily. virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const; /// Get object metadata if supported. It should be possible to receive @@ -269,6 +271,7 @@ public: { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for S3ObjectStorage"); } + virtual std::shared_ptr tryGetS3StorageClient() { return nullptr; } #endif diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 63e11dcd8c8..0f7024196ea 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -634,6 +634,10 @@ std::shared_ptr S3ObjectStorage::getS3StorageClient() return client.get(); } +std::shared_ptr S3ObjectStorage::tryGetS3StorageClient() +{ + return client.get(); +} } #endif diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 7446a1f6fc8..4170cea22a0 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -169,6 +169,7 @@ public: bool isReadOnly() const override { return s3_settings.get()->read_only; } std::shared_ptr getS3StorageClient() override; + std::shared_ptr tryGetS3StorageClient() override; private: void setNewSettings(std::unique_ptr && s3_settings_); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index aa9600875db..79c2e6b4890 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -149,6 +149,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects; format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence; format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields; + format_settings.json.ignore_key_case = settings.input_format_json_ignore_key_case; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; @@ -171,6 +172,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.parquet.parallel_encoding = settings.output_format_parquet_parallel_encoding; format_settings.parquet.data_page_size = settings.output_format_parquet_data_page_size; format_settings.parquet.write_batch_size = settings.output_format_parquet_batch_size; + format_settings.parquet.write_page_index = settings.output_format_parquet_write_page_index; format_settings.parquet.local_read_min_bytes_for_seek = settings.input_format_parquet_local_file_min_bytes_for_seek; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 18e7df8f24e..8ac783a1d86 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -228,6 +228,7 @@ struct FormatSettings bool infer_incomplete_types_as_strings = true; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; + bool ignore_key_case = false; } json{}; struct @@ -275,6 +276,7 @@ struct FormatSettings bool output_compliant_nested_types = true; size_t data_page_size = 1024 * 1024; size_t write_batch_size = 1024; + bool write_page_index = false; size_t local_read_min_bytes_for_seek = 8192; } parquet{}; diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index c52b00150ec..7c90f83569a 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -3,35 +3,9 @@ add_subdirectory(divide) include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_functions .) -set(DBMS_FUNCTIONS - IFunction.cpp - FunctionFactory.cpp - FunctionHelpers.cpp - extractTimeZoneFromFunctionArguments.cpp - FunctionsLogical.cpp - if.cpp - multiIf.cpp - multiMatchAny.cpp - checkHyperscanRegexp.cpp - array/has.cpp - CastOverloadResolver.cpp - # Provides dependency for cast - createFunctionBaseCast() - FunctionsConversion.cpp -) -extract_into_parent_list(clickhouse_functions_sources dbms_sources ${DBMS_FUNCTIONS}) -extract_into_parent_list(clickhouse_functions_headers dbms_headers - IFunction.h - FunctionFactory.h - FunctionHelpers.h - extractTimeZoneFromFunctionArguments.h - FunctionsLogical.h - CastOverloadResolver.h -) - add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_headers} ${clickhouse_functions_sources}) if (OMIT_HEAVY_DEBUG_SYMBOLS) target_compile_options(clickhouse_functions_obj PRIVATE "-g0") - set_source_files_properties(${DBMS_FUNCTIONS} DIRECTORY .. PROPERTIES COMPILE_FLAGS "-g0") endif() list (APPEND OBJECT_LIBS $) diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index 05914be3837..083179c3ca8 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -25,10 +25,10 @@ namespace ErrorCodes enum class Base64Variant : uint8_t { Normal, - Url + URL }; -inline std::string preprocessBase64Url(std::string_view src) +inline std::string preprocessBase64URL(std::string_view src) { std::string padded_src; padded_src.reserve(src.size() + 3); @@ -70,7 +70,7 @@ inline std::string preprocessBase64Url(std::string_view src) return padded_src; } -inline size_t postprocessBase64Url(UInt8 * dst, size_t out_len) +inline size_t postprocessBase64URL(UInt8 * dst, size_t out_len) { // Do symbol substitution as described in https://datatracker.ietf.org/doc/html/rfc4648#section-5 for (size_t i = 0; i < out_len; ++i) @@ -95,7 +95,7 @@ inline size_t postprocessBase64Url(UInt8 * dst, size_t out_len) template struct Base64Encode { - static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Encode" : "base64UrlEncode"; + static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Encode" : "base64URLEncode"; static size_t getBufferSize(size_t string_length, size_t string_count) { @@ -111,8 +111,8 @@ struct Base64Encode /// Memory sanitizer doesn't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. __msan_unpoison(dst, outlen); - if constexpr (variant == Base64Variant::Url) - outlen = postprocessBase64Url(dst, outlen); + if constexpr (variant == Base64Variant::URL) + outlen = postprocessBase64URL(dst, outlen); return outlen; } @@ -121,7 +121,7 @@ struct Base64Encode template struct Base64Decode { - static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Decode" : "base64UrlDecode"; + static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Decode" : "base64URLDecode"; static size_t getBufferSize(size_t string_length, size_t string_count) { @@ -132,9 +132,9 @@ struct Base64Decode { int rc; size_t outlen = 0; - if constexpr (variant == Base64Variant::Url) + if constexpr (variant == Base64Variant::URL) { - std::string src_padded = preprocessBase64Url(src); + std::string src_padded = preprocessBase64URL(src); rc = base64_decode(src_padded.data(), src_padded.size(), reinterpret_cast(dst), &outlen, 0); } else @@ -156,7 +156,7 @@ struct Base64Decode template struct TryBase64Decode { - static constexpr auto name = (variant == Base64Variant::Normal) ? "tryBase64Decode" : "tryBase64UrlDecode"; + static constexpr auto name = (variant == Base64Variant::Normal) ? "tryBase64Decode" : "tryBase64URLDecode"; static size_t getBufferSize(size_t string_length, size_t string_count) { @@ -167,9 +167,9 @@ struct TryBase64Decode { int rc; size_t outlen = 0; - if constexpr (variant == Base64Variant::Url) + if constexpr (variant == Base64Variant::URL) { - std::string src_padded = preprocessBase64Url(src); + std::string src_padded = preprocessBase64URL(src); rc = base64_decode(src_padded.data(), src_padded.size(), reinterpret_cast(dst), &outlen, 0); } else diff --git a/src/Functions/FunctionsStringDistance.cpp b/src/Functions/FunctionsStringDistance.cpp index 6cb23bbea9f..48f4aaf4e09 100644 --- a/src/Functions/FunctionsStringDistance.cpp +++ b/src/Functions/FunctionsStringDistance.cpp @@ -113,6 +113,36 @@ struct ByteHammingDistanceImpl } }; +void parseUTF8String(const char * __restrict data, size_t size, std::function utf8_consumer, std::function ascii_consumer = nullptr) +{ + const char * end = data + size; + while (data < end) + { + size_t len = UTF8::seqLength(*data); + if (len == 1) + { + if (ascii_consumer) + ascii_consumer(static_cast(*data)); + else + utf8_consumer(static_cast(*data)); + ++data; + } + else + { + auto code_point = UTF8::convertUTF8ToCodePoint(data, end - data); + if (code_point.has_value()) + { + utf8_consumer(code_point.value()); + data += len; + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(data, end - data)); + } + } + } +} + template struct ByteJaccardIndexImpl { @@ -138,57 +168,28 @@ struct ByteJaccardIndexImpl haystack_set.fill(0); needle_set.fill(0); - while (haystack < haystack_end) + if constexpr (is_utf8) { - size_t len = 1; - if constexpr (is_utf8) - len = UTF8::seqLength(*haystack); - - if (len == 1) + parseUTF8String( + haystack, + haystack_size, + [&](UInt32 data) { haystack_utf8_set.insert(data); }, + [&](unsigned char data) { haystack_set[data] = 1; }); + parseUTF8String( + needle, needle_size, [&](UInt32 data) { needle_utf8_set.insert(data); }, [&](unsigned char data) { needle_set[data] = 1; }); + } + else + { + while (haystack < haystack_end) { haystack_set[static_cast(*haystack)] = 1; ++haystack; } - else - { - auto code_point = UTF8::convertUTF8ToCodePoint(haystack, haystack_end - haystack); - if (code_point.has_value()) - { - haystack_utf8_set.insert(code_point.value()); - haystack += len; - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(haystack, haystack_end - haystack)); - } - } - } - - while (needle < needle_end) - { - - size_t len = 1; - if constexpr (is_utf8) - len = UTF8::seqLength(*needle); - - if (len == 1) + while (needle < needle_end) { needle_set[static_cast(*needle)] = 1; ++needle; } - else - { - auto code_point = UTF8::convertUTF8ToCodePoint(needle, needle_end - needle); - if (code_point.has_value()) - { - needle_utf8_set.insert(code_point.value()); - needle += len; - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(needle, needle_end - needle)); - } - } } UInt8 intersection = 0; @@ -226,6 +227,7 @@ struct ByteJaccardIndexImpl static constexpr size_t max_string_size = 1u << 16; +template struct ByteEditDistanceImpl { using ResultType = UInt64; @@ -242,6 +244,16 @@ struct ByteEditDistanceImpl ErrorCodes::TOO_LARGE_STRING_SIZE, "The string size is too big for function editDistance, should be at most {}", max_string_size); + PaddedPODArray haystack_utf8; + PaddedPODArray needle_utf8; + if constexpr (is_utf8) + { + parseUTF8String(haystack, haystack_size, [&](UInt32 data) { haystack_utf8.push_back(data); }); + parseUTF8String(needle, needle_size, [&](UInt32 data) { needle_utf8.push_back(data); }); + haystack_size = haystack_utf8.size(); + needle_size = needle_utf8.size(); + } + PaddedPODArray distances0(haystack_size + 1, 0); PaddedPODArray distances1(haystack_size + 1, 0); @@ -261,9 +273,16 @@ struct ByteEditDistanceImpl insertion = distances1[pos_haystack] + 1; substitution = distances0[pos_haystack]; - if (*(needle + pos_needle) != *(haystack + pos_haystack)) - substitution += 1; - + if constexpr (is_utf8) + { + if (needle_utf8[pos_needle] != haystack_utf8[pos_haystack]) + substitution += 1; + } + else + { + if (*(needle + pos_needle) != *(haystack + pos_haystack)) + substitution += 1; + } distances1[pos_haystack + 1] = std::min(deletion, std::min(substitution, insertion)); } distances0.swap(distances1); @@ -457,7 +476,12 @@ struct NameEditDistance { static constexpr auto name = "editDistance"; }; -using FunctionEditDistance = FunctionsStringSimilarity, NameEditDistance>; +using FunctionEditDistance = FunctionsStringSimilarity>, NameEditDistance>; +struct NameEditDistanceUTF8 +{ + static constexpr auto name = "editDistanceUTF8"; +}; +using FunctionEditDistanceUTF8 = FunctionsStringSimilarity>, NameEditDistanceUTF8>; struct NameDamerauLevenshteinDistance { @@ -499,6 +523,10 @@ REGISTER_FUNCTION(StringDistance) FunctionDocumentation{.description = R"(Calculates the edit distance between two byte-strings.)"}); factory.registerAlias("levenshteinDistance", NameEditDistance::name); + factory.registerFunction( + FunctionDocumentation{.description = R"(Calculates the edit distance between two UTF8 strings.)"}); + factory.registerAlias("levenshteinDistanceUTF8", NameEditDistanceUTF8::name); + factory.registerFunction( FunctionDocumentation{.description = R"(Calculates the Damerau-Levenshtein distance two between two byte-string.)"}); diff --git a/src/Functions/array/arrayAggregation.cpp b/src/Functions/array/arrayAggregation.cpp index 03aa5fb9086..adb1bb707d8 100644 --- a/src/Functions/array/arrayAggregation.cpp +++ b/src/Functions/array/arrayAggregation.cpp @@ -1,5 +1,7 @@ #include +#include +#include #include #include #include @@ -102,6 +104,11 @@ struct ArrayAggregateImpl static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & /*array_element*/) { + if (aggregate_operation == AggregateOperation::max || aggregate_operation == AggregateOperation::min) + { + return expression_return; + } + DataTypePtr result; auto call = [&](const auto & types) @@ -133,31 +140,6 @@ struct ArrayAggregateImpl return true; } } - else if constexpr (aggregate_operation == AggregateOperation::max || aggregate_operation == AggregateOperation::min) - { - if constexpr (IsDataTypeDate) - { - result = std::make_shared(); - - return true; - } - else if constexpr (!IsDataTypeDecimal) - { - std::string timezone = getDateTimeTimezone(*expression_return); - result = std::make_shared(timezone); - - return true; - } - else - { - std::string timezone = getDateTimeTimezone(*expression_return); - UInt32 scale = getDecimalScale(*expression_return); - result = std::make_shared(scale, timezone); - - return true; - } - } - return false; }; @@ -378,6 +360,47 @@ struct ArrayAggregateImpl static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped) { + if constexpr (aggregate_operation == AggregateOperation::max || aggregate_operation == AggregateOperation::min) + { + MutableColumnPtr res; + const auto & column = array.getDataPtr(); + const ColumnConst * const_column = checkAndGetColumn(&*column); + if (const_column) + { + res = const_column->getDataColumn().cloneEmpty(); + } + else + { + res = column->cloneEmpty(); + } + const IColumn::Offsets & offsets = array.getOffsets(); + size_t pos = 0; + for (const auto & offset : offsets) + { + if (offset == pos) + { + res->insertDefault(); + continue; + } + size_t current_max_or_min_index = pos; + ++pos; + for (; pos < offset; ++pos) + { + int compare_result = column->compareAt(pos, current_max_or_min_index, *column, 1); + if (aggregate_operation == AggregateOperation::max && compare_result > 0) + { + current_max_or_min_index = pos; + } + else if (aggregate_operation == AggregateOperation::min && compare_result < 0) + { + current_max_or_min_index = pos; + } + } + res->insert((*column)[current_max_or_min_index]); + } + return res; + } + const IColumn::Offsets & offsets = array.getOffsets(); ColumnPtr res; diff --git a/src/Functions/base64UrlDecode.cpp b/src/Functions/base64URLDecode.cpp similarity index 73% rename from src/Functions/base64UrlDecode.cpp rename to src/Functions/base64URLDecode.cpp index 59975d8f9d1..f5766dc60bd 100644 --- a/src/Functions/base64UrlDecode.cpp +++ b/src/Functions/base64URLDecode.cpp @@ -5,16 +5,16 @@ namespace DB { -REGISTER_FUNCTION(Base64UrlDecode) +REGISTER_FUNCTION(Base64URLDecode) { FunctionDocumentation::Description description = R"(Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; - FunctionDocumentation::Syntax syntax = "base64UrlDecode(encodedUrl)"; - FunctionDocumentation::Arguments arguments = {{"encodedUrl", "String column or constant. If the string is not a valid Base64-encoded value, an exception is thrown."}}; + FunctionDocumentation::Syntax syntax = "base64URLDecode(encodedURL)"; + FunctionDocumentation::Arguments arguments = {{"encodedURL", "String column or constant. If the string is not a valid Base64-encoded value, an exception is thrown."}}; FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument."; - FunctionDocumentation::Examples examples = {{"Example", "SELECT base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t')", "https://clickhouse.com"}}; + FunctionDocumentation::Examples examples = {{"Example", "SELECT base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t')", "https://clickhouse.com"}}; FunctionDocumentation::Categories categories = {"String encoding"}; - factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } } diff --git a/src/Functions/base64UrlEncode.cpp b/src/Functions/base64URLEncode.cpp similarity index 78% rename from src/Functions/base64UrlEncode.cpp rename to src/Functions/base64URLEncode.cpp index 05d50170c14..73a465a30c5 100644 --- a/src/Functions/base64UrlEncode.cpp +++ b/src/Functions/base64URLEncode.cpp @@ -5,16 +5,16 @@ namespace DB { -REGISTER_FUNCTION(Base64UrlEncode) +REGISTER_FUNCTION(Base64URLEncode) { FunctionDocumentation::Description description = R"(Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; - FunctionDocumentation::Syntax syntax = "base64UrlEncode(url)"; + FunctionDocumentation::Syntax syntax = "base64URLEncode(url)"; FunctionDocumentation::Arguments arguments = {{"url", "String column or constant."}}; FunctionDocumentation::ReturnedValue returned_value = "A string containing the encoded value of the argument."; - FunctionDocumentation::Examples examples = {{"Example", "SELECT base64UrlEncode('https://clickhouse.com')", "aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ"}}; + FunctionDocumentation::Examples examples = {{"Example", "SELECT base64URLEncode('https://clickhouse.com')", "aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ"}}; FunctionDocumentation::Categories categories = {"String encoding"}; - factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } } diff --git a/src/Functions/dateTimeToSnowflakeID.cpp b/src/Functions/dateTimeToSnowflakeID.cpp new file mode 100644 index 00000000000..968a7628ca5 --- /dev/null +++ b/src/Functions/dateTimeToSnowflakeID.cpp @@ -0,0 +1,158 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace +{ + +/// See generateSnowflakeID.cpp +constexpr size_t time_shift = 22; + +} + +class FunctionDateTimeToSnowflakeID : public IFunction +{ +public: + static constexpr auto name = "dateTimeToSnowflakeID"; + + static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared(); } + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"value", static_cast(&isDateTime), nullptr, "DateTime"} + }; + FunctionArgumentDescriptors optional_args{ + {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} + }; + validateFunctionArgumentTypes(*this, arguments, args, optional_args); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & col_src = *arguments[0].column; + + UInt64 epoch = 0; + if (arguments.size() == 2 && input_rows_count != 0) + { + const auto & col_epoch = *arguments[1].column; + epoch = col_epoch.getUInt(0); + } + + auto col_res = ColumnUInt64::create(input_rows_count); + auto & res_data = col_res->getData(); + + const auto & src_data = typeid_cast(col_src).getData(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = (static_cast(src_data[i]) * 1000 - epoch) << time_shift; + return col_res; + } +}; + + +class FunctionDateTime64ToSnowflakeID : public IFunction +{ +public: + static constexpr auto name = "dateTime64ToSnowflakeID"; + + static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared(); } + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} + }; + FunctionArgumentDescriptors optional_args{ + {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} + }; + validateFunctionArgumentTypes(*this, arguments, args, optional_args); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & col_src = *arguments[0].column; + const auto & src_data = typeid_cast(col_src).getData(); + + UInt64 epoch = 0; + if (arguments.size() == 2 && input_rows_count != 0) + { + const auto & col_epoch = *arguments[1].column; + epoch = col_epoch.getUInt(0); + } + + auto col_res = ColumnUInt64::create(input_rows_count); + auto & res_data = col_res->getData(); + + /// timestamps in snowflake-ids are millisecond-based, convert input to milliseconds + UInt32 src_scale = getDecimalScale(*arguments[0].type); + Int64 multiplier_msec = DecimalUtils::scaleMultiplier(3); + Int64 multiplier_src = DecimalUtils::scaleMultiplier(src_scale); + auto factor = multiplier_msec / static_cast(multiplier_src); + + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = std::llround(src_data[i] * factor - epoch) << time_shift; + + return col_res; + } +}; + +REGISTER_FUNCTION(DateTimeToSnowflakeID) +{ + { + FunctionDocumentation::Description description = R"(Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.)"; + FunctionDocumentation::Syntax syntax = "dateTimeToSnowflakeID(value[, epoch])"; + FunctionDocumentation::Arguments arguments = { + {"value", "Date with time. [DateTime](../data-types/datetime.md)."}, + {"epoch", "Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md)"} + }; + FunctionDocumentation::ReturnedValue returned_value = "Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time."; + FunctionDocumentation::Examples examples = {{"simple", "SELECT dateTimeToSnowflakeID(toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai'))", "6832626392367104000"}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + } + + { + FunctionDocumentation::Description description = R"(Converts a [DateTime64](../data-types/datetime64.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.)"; + FunctionDocumentation::Syntax syntax = "dateTime64ToSnowflakeID(value[, epoch])"; + FunctionDocumentation::Arguments arguments = { + {"value", "Date with time. [DateTime64](../data-types/datetime.md)."}, + {"epoch", "Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md)"} + }; + FunctionDocumentation::ReturnedValue returned_value = "Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time."; + FunctionDocumentation::Examples examples = {{"simple", "SELECT dateTime64ToSnowflakeID(toDateTime64('2021-08-15 18:57:56', 3, 'Asia/Shanghai'))", "6832626394434895872"}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + } +} + +} diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index f1e47ea1158..8ac010deafc 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -207,7 +207,7 @@ public: REGISTER_FUNCTION(GenerateSnowflakeID) { - FunctionDocumentation::Description description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; + FunctionDocumentation::Description description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds (41 + 1 top zero bits), followed by a machine id (10 bits), and a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; FunctionDocumentation::Syntax syntax = "generateSnowflakeID([expression])"; FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64"; diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 11e210d2cc2..162b8c58873 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -978,8 +978,7 @@ namespace [[nodiscard]] static PosOrError mysqlAmericanDate(Pos cur, Pos end, const String & fragment, DateTime & date) { - if (auto status = checkSpace(cur, end, 8, "mysqlAmericanDate requires size >= 8", fragment)) - return tl::unexpected(status.error()); + RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 8, "mysqlAmericanDate requires size >= 8", fragment)) Int32 month; ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumber2(cur, end, fragment, month))) @@ -993,7 +992,7 @@ namespace Int32 year; ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumber2(cur, end, fragment, year))) - RETURN_ERROR_IF_FAILED(date.setYear(year)) + RETURN_ERROR_IF_FAILED(date.setYear(year + 2000)) return cur; } @@ -1015,8 +1014,7 @@ namespace [[nodiscard]] static PosOrError mysqlISO8601Date(Pos cur, Pos end, const String & fragment, DateTime & date) { - if (auto status = checkSpace(cur, end, 10, "mysqlISO8601Date requires size >= 10", fragment)) - return tl::unexpected(status.error()); + RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 10, "mysqlISO8601Date requires size >= 10", fragment)) Int32 year; Int32 month; @@ -1462,8 +1460,7 @@ namespace [[nodiscard]] static PosOrError jodaDayOfWeekText(size_t /*min_represent_digits*/, Pos cur, Pos end, const String & fragment, DateTime & date) { - if (auto result= checkSpace(cur, end, 3, "jodaDayOfWeekText requires size >= 3", fragment); !result.has_value()) - return tl::unexpected(result.error()); + RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 3, "jodaDayOfWeekText requires size >= 3", fragment)) String text1(cur, 3); boost::to_lower(text1); @@ -1556,8 +1553,8 @@ namespace Int32 day_of_month; ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumberWithVariableLength( cur, end, false, false, false, repetitions, std::max(repetitions, 2uz), fragment, day_of_month))) - if (auto res = date.setDayOfMonth(day_of_month); !res.has_value()) - return tl::unexpected(res.error()); + RETURN_ERROR_IF_FAILED(date.setDayOfMonth(day_of_month)) + return cur; } diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index 4a2d502a31a..5ff8a636058 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -11,11 +11,17 @@ #include +/// ------------------------------------------------------------------------------------------------------------------------------ +/// The functions in this file are deprecated and should be removed in favor of functions 'snowflakeIDToDateTime[64]' and +/// 'dateTime[64]ToSnowflakeID' by summer 2025. Please also mark setting `allow_deprecated_snowflake_conversion_functions` as obsolete then. +/// ------------------------------------------------------------------------------------------------------------------------------ + namespace DB { namespace ErrorCodes { + extern const int DEPRECATED_FUNCTION; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -34,10 +40,19 @@ constexpr int time_shift = 22; class FunctionDateTimeToSnowflake : public IFunction { private: - const char * name; + const bool allow_deprecated_snowflake_conversion_functions; public: - explicit FunctionDateTimeToSnowflake(const char * name_) : name(name_) { } + static constexpr auto name = "dateTimeToSnowflake"; + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + + explicit FunctionDateTimeToSnowflake(ContextPtr context) + : allow_deprecated_snowflake_conversion_functions(context->getSettingsRef().allow_deprecated_snowflake_conversion_functions) + {} String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } @@ -56,6 +71,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (!allow_deprecated_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it set setting 'allow_deprecated_snowflake_conversion_functions' to 'true'", getName()); + const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -73,13 +91,20 @@ public: class FunctionSnowflakeToDateTime : public IFunction { private: - const char * name; const bool allow_nonconst_timezone_arguments; + const bool allow_deprecated_snowflake_conversion_functions; public: - explicit FunctionSnowflakeToDateTime(const char * name_, ContextPtr context) - : name(name_) - , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + static constexpr auto name = "snowflakeToDateTime"; + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + + explicit FunctionSnowflakeToDateTime(ContextPtr context) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) + , allow_deprecated_snowflake_conversion_functions(context->getSettingsRef().allow_deprecated_snowflake_conversion_functions) {} String getName() const override { return name; } @@ -107,6 +132,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (!allow_deprecated_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it set setting 'allow_deprecated_snowflake_conversion_functions' to 'true'", getName()); + const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -138,10 +166,19 @@ public: class FunctionDateTime64ToSnowflake : public IFunction { private: - const char * name; + const bool allow_deprecated_snowflake_conversion_functions; public: - explicit FunctionDateTime64ToSnowflake(const char * name_) : name(name_) { } + static constexpr auto name = "dateTime64ToSnowflake"; + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + + explicit FunctionDateTime64ToSnowflake(ContextPtr context) + : allow_deprecated_snowflake_conversion_functions(context->getSettingsRef().allow_deprecated_snowflake_conversion_functions) + {} String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } @@ -160,6 +197,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (!allow_deprecated_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it set setting 'allow_deprecated_snowflake_conversion_functions' to true", getName()); + const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -185,13 +225,20 @@ public: class FunctionSnowflakeToDateTime64 : public IFunction { private: - const char * name; const bool allow_nonconst_timezone_arguments; + const bool allow_deprecated_snowflake_conversion_functions; public: - explicit FunctionSnowflakeToDateTime64(const char * name_, ContextPtr context) - : name(name_) - , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + static constexpr auto name = "snowflakeToDateTime64"; + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + + explicit FunctionSnowflakeToDateTime64(ContextPtr context) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) + , allow_deprecated_snowflake_conversion_functions(context->getSettingsRef().allow_deprecated_snowflake_conversion_functions) {} String getName() const override { return name; } @@ -219,6 +266,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (!allow_deprecated_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it set setting 'allow_deprecated_snowflake_conversion_functions' to true", getName()); + const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -246,27 +296,12 @@ public: } -REGISTER_FUNCTION(DateTimeToSnowflake) +REGISTER_FUNCTION(LegacySnowflakeConversion) { - factory.registerFunction("dateTimeToSnowflake", - [](ContextPtr){ return std::make_shared("dateTimeToSnowflake"); }); -} - -REGISTER_FUNCTION(DateTime64ToSnowflake) -{ - factory.registerFunction("dateTime64ToSnowflake", - [](ContextPtr){ return std::make_shared("dateTime64ToSnowflake"); }); -} - -REGISTER_FUNCTION(SnowflakeToDateTime) -{ - factory.registerFunction("snowflakeToDateTime", - [](ContextPtr context){ return std::make_shared("snowflakeToDateTime", context); }); -} -REGISTER_FUNCTION(SnowflakeToDateTime64) -{ - factory.registerFunction("snowflakeToDateTime64", - [](ContextPtr context){ return std::make_shared("snowflakeToDateTime64", context); }); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/snowflakeIDToDateTime.cpp b/src/Functions/snowflakeIDToDateTime.cpp new file mode 100644 index 00000000000..b799792a56f --- /dev/null +++ b/src/Functions/snowflakeIDToDateTime.cpp @@ -0,0 +1,206 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +/// See generateSnowflakeID.cpp +constexpr size_t time_shift = 22; + +} + +class FunctionSnowflakeIDToDateTime : public IFunction +{ +private: + const bool allow_nonconst_timezone_arguments; + +public: + static constexpr auto name = "snowflakeIDToDateTime"; + + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionSnowflakeIDToDateTime(ContextPtr context) + : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"value", static_cast(&isUInt64), nullptr, "UInt64"} + }; + FunctionArgumentDescriptors optional_args{ + {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, + {"time_zone", static_cast(&isString), nullptr, "String"} + }; + validateFunctionArgumentTypes(*this, arguments, args, optional_args); + + String timezone; + if (arguments.size() == 3) + timezone = extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, allow_nonconst_timezone_arguments); + + return std::make_shared(timezone); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & col_src = *arguments[0].column; + + UInt64 epoch = 0; + if (arguments.size() >= 2 && input_rows_count != 0) + { + const auto & col_epoch = *arguments[1].column; + epoch = col_epoch.getUInt(0); + } + + auto col_res = ColumnDateTime::create(input_rows_count); + auto & res_data = col_res->getData(); + + if (const auto * col_src_non_const = typeid_cast(&col_src)) + { + const auto & src_data = col_src_non_const->getData(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = static_cast(((src_data[i] >> time_shift) + epoch) / 1000); + } + else if (const auto * col_src_const = typeid_cast(&col_src)) + { + UInt64 src_val = col_src_const->getValue(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = static_cast(((src_val >> time_shift) + epoch) / 1000); + } + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument for function {}", name); + + return col_res; + } +}; + + +class FunctionSnowflakeIDToDateTime64 : public IFunction +{ +private: + const bool allow_nonconst_timezone_arguments; + +public: + static constexpr auto name = "snowflakeIDToDateTime64"; + + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionSnowflakeIDToDateTime64(ContextPtr context) + : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"value", static_cast(&isUInt64), nullptr, "UInt64"} + }; + FunctionArgumentDescriptors optional_args{ + {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, + {"time_zone", static_cast(&isString), nullptr, "String"} + }; + validateFunctionArgumentTypes(*this, arguments, args, optional_args); + + String timezone; + if (arguments.size() == 3) + timezone = extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, allow_nonconst_timezone_arguments); + + return std::make_shared(3, timezone); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & col_src = *arguments[0].column; + + UInt64 epoch = 0; + if (arguments.size() >= 2 && input_rows_count != 0) + { + const auto & col_epoch = *arguments[1].column; + epoch = col_epoch.getUInt(0); + } + + auto col_res = ColumnDateTime64::create(input_rows_count, 3); + auto & res_data = col_res->getData(); + + if (const auto * col_src_non_const = typeid_cast(&col_src)) + { + const auto & src_data = col_src_non_const->getData(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = (src_data[i] >> time_shift) + epoch; + } + else if (const auto * col_src_const = typeid_cast(&col_src)) + { + UInt64 src_val = col_src_const->getValue(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = (src_val >> time_shift) + epoch; + } + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument for function {}", name); + + return col_res; + + } +}; + +REGISTER_FUNCTION(SnowflakeIDToDateTime) +{ + { + FunctionDocumentation::Description description = R"(Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime](../data-types/datetime.md).)"; + FunctionDocumentation::Syntax syntax = "snowflakeIDToDateTime(value[, epoch[, time_zone]])"; + FunctionDocumentation::Arguments arguments = { + {"value", "Snowflake ID. [UInt64](../data-types/int-uint.md)"}, + {"epoch", "Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md)"}, + {"time_zone", "[Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md)"} + }; + FunctionDocumentation::ReturnedValue returned_value = "The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value."; + FunctionDocumentation::Examples examples = {{"simple", "SELECT snowflakeIDToDateTime(7204436857747984384)", "2024-06-06 10:59:58"}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + } + + { + FunctionDocumentation::Description description = R"(Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime64](../data-types/datetime64.md).)"; + FunctionDocumentation::Syntax syntax = "snowflakeIDToDateTime64(value[, epoch[, time_zone]])"; + FunctionDocumentation::Arguments arguments = { + {"value", "Snowflake ID. [UInt64](../data-types/int-uint.md)"}, + {"epoch", "Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md)"}, + {"time_zone", "[Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md)"} + }; + FunctionDocumentation::ReturnedValue returned_value = "The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision."; + FunctionDocumentation::Examples examples = {{"simple", "SELECT snowflakeIDToDateTime64(7204436857747984384)", "2024-06-06 10:59:58"}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + } +} + +} diff --git a/src/Functions/tests/gtest_ternary_logic.cpp b/src/Functions/tests/gtest_ternary_logic.cpp deleted file mode 100644 index 5ecafabb361..00000000000 --- a/src/Functions/tests/gtest_ternary_logic.cpp +++ /dev/null @@ -1,354 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// I know that inclusion of .cpp is not good at all -#include // NOLINT - -using namespace DB; -using TernaryValues = std::vector; - -struct LinearCongruentialGenerator -{ - /// Constants from `man lrand48_r`. - static constexpr UInt64 a = 0x5DEECE66D; - static constexpr UInt64 c = 0xB; - - /// And this is from `head -c8 /dev/urandom | xxd -p` - UInt64 current = 0x09826f4a081cee35ULL; - - UInt32 next() - { - current = current * a + c; - return static_cast(current >> 16); - } -}; - -void generateRandomTernaryValue(LinearCongruentialGenerator & gen, Ternary::ResultType * output, size_t size, double false_ratio, double null_ratio) -{ - /// The LinearCongruentialGenerator generates nonnegative integers uniformly distributed over the interval [0, 2^32). - /// See https://linux.die.net/man/3/nrand48 - - double false_percentile = false_ratio; - double null_percentile = false_ratio + null_ratio; - - false_percentile = false_percentile > 1 ? 1 : false_percentile; - null_percentile = null_percentile > 1 ? 1 : null_percentile; - - UInt32 false_threshold = static_cast(static_cast(std::numeric_limits::max()) * false_percentile); - UInt32 null_threshold = static_cast(static_cast(std::numeric_limits::max()) * null_percentile); - - for (Ternary::ResultType * end = output + size; output != end; ++output) - { - UInt32 val = gen.next(); - *output = val < false_threshold ? Ternary::False : (val < null_threshold ? Ternary::Null : Ternary::True); - } -} - -template -ColumnPtr createColumnNullable(const Ternary::ResultType * ternary_values, size_t size) -{ - auto nested_column = ColumnVector::create(size); - auto null_map = ColumnUInt8::create(size); - auto & nested_column_data = nested_column->getData(); - auto & null_map_data = null_map->getData(); - - for (size_t i = 0; i < size; ++i) - { - if (ternary_values[i] == Ternary::Null) - { - null_map_data[i] = 1; - nested_column_data[i] = 0; - } - else if (ternary_values[i] == Ternary::True) - { - null_map_data[i] = 0; - nested_column_data[i] = 100; - } - else - { - null_map_data[i] = 0; - nested_column_data[i] = 0; - } - } - - return ColumnNullable::create(std::move(nested_column), std::move(null_map)); -} - -template -ColumnPtr createColumnVector(const Ternary::ResultType * ternary_values, size_t size) -{ - auto column = ColumnVector::create(size); - auto & column_data = column->getData(); - - for (size_t i = 0; i < size; ++i) - { - if (ternary_values[i] == Ternary::True) - { - column_data[i] = 100; - } - else - { - column_data[i] = 0; - } - } - - return column; -} - -template -ColumnPtr createRandomColumn(LinearCongruentialGenerator & gen, TernaryValues & ternary_values) -{ - size_t size = ternary_values.size(); - Ternary::ResultType * ternary_data = ternary_values.data(); - - if constexpr (std::is_same_v) - { - generateRandomTernaryValue(gen, ternary_data, size, 0.3, 0.7); - return createColumnNullable(ternary_data, size); - } - else if constexpr (std::is_same_v>) - { - generateRandomTernaryValue(gen, ternary_data, size, 0.5, 0); - return createColumnVector(ternary_data, size); - } - else - { - auto nested_col = ColumnNothing::create(size); - auto null_map = ColumnUInt8::create(size); - - memset(ternary_data, Ternary::Null, size); - - return ColumnNullable::create(std::move(nested_col), std::move(null_map)); - } -} - -/* The truth table of ternary And and Or operations: - * +-------+-------+---------+--------+ - * | a | b | a And b | a Or b | - * +-------+-------+---------+--------+ - * | False | False | False | False | - * | False | Null | False | Null | - * | False | True | False | True | - * | Null | False | False | Null | - * | Null | Null | Null | Null | - * | Null | True | Null | True | - * | True | False | False | True | - * | True | Null | Null | True | - * | True | True | True | True | - * +-------+-------+---------+--------+ - * - * https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic - */ -template -bool testTernaryLogicTruthTable() -{ - constexpr size_t size = 9; - - Ternary::ResultType col_a_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null, Ternary::Null, Ternary::True, Ternary::True, Ternary::True}; - Ternary::ResultType col_b_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::False, Ternary::Null, Ternary::True,Ternary::False, Ternary::Null, Ternary::True}; - Ternary::ResultType and_expected_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null,Ternary::False, Ternary::Null, Ternary::True}; - Ternary::ResultType or_expected_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::Null, Ternary::Null, Ternary::True,Ternary::True, Ternary::True, Ternary::True}; - Ternary::ResultType * expected_ternary; - - - if constexpr (std::is_same_v) - { - expected_ternary = and_expected_ternary; - } - else - { - expected_ternary = or_expected_ternary; - } - - auto col_a = createColumnNullable(col_a_ternary, size); - auto col_b = createColumnNullable(col_b_ternary, size); - ColumnRawPtrs arguments = {col_a.get(), col_b.get()}; - - auto col_res = ColumnUInt8::create(size); - auto & col_res_data = col_res->getData(); - - OperationApplier::apply(arguments, col_res->getData(), false); - - for (size_t i = 0; i < size; ++i) - { - if (col_res_data[i] != expected_ternary[i]) return false; - } - - return true; -} - -template -bool testTernaryLogicOfTwoColumns(size_t size) -{ - LinearCongruentialGenerator gen; - - TernaryValues left_column_ternary(size); - TernaryValues right_column_ternary(size); - TernaryValues expected_ternary(size); - - ColumnPtr left = createRandomColumn(gen, left_column_ternary); - ColumnPtr right = createRandomColumn(gen, right_column_ternary); - - for (size_t i = 0; i < size; ++i) - { - /// Given that False is less than Null and Null is less than True, the And operation can be implemented - /// with std::min, and the Or operation can be implemented with std::max. - if constexpr (std::is_same_v) - { - expected_ternary[i] = std::min(left_column_ternary[i], right_column_ternary[i]); - } - else - { - expected_ternary[i] = std::max(left_column_ternary[i], right_column_ternary[i]); - } - } - - ColumnRawPtrs arguments = {left.get(), right.get()}; - - auto col_res = ColumnUInt8::create(size); - auto & col_res_data = col_res->getData(); - - OperationApplier::apply(arguments, col_res->getData(), false); - - for (size_t i = 0; i < size; ++i) - { - if (col_res_data[i] != expected_ternary[i]) return false; - } - - return true; -} - -TEST(TernaryLogicTruthTable, NestedUInt8) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedUInt16) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedUInt32) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedUInt64) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedInt8) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedInt16) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedInt32) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedInt64) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedFloat32) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedFloat64) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, TwoNullable) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, TwoVector) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, TwoNothing) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, NullableVector) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, NullableNothing) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, VectorNothing) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} diff --git a/src/Functions/tryBase64UrlDecode.cpp b/src/Functions/tryBase64URLDecode.cpp similarity index 69% rename from src/Functions/tryBase64UrlDecode.cpp rename to src/Functions/tryBase64URLDecode.cpp index b9aaf4f9273..b44bc7538ee 100644 --- a/src/Functions/tryBase64UrlDecode.cpp +++ b/src/Functions/tryBase64URLDecode.cpp @@ -5,16 +5,16 @@ namespace DB { -REGISTER_FUNCTION(TryBase64UrlDecode) +REGISTER_FUNCTION(TryBase64URLDecode) { - FunctionDocumentation::Description description = R"(Decodes an URL from base64, like base64UrlDecode but returns an empty string in case of an error.)"; - FunctionDocumentation::Syntax syntax = "tryBase64UrlDecode(encodedUrl)"; - FunctionDocumentation::Arguments arguments = {{"encodedUrl", "String column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string."}}; + FunctionDocumentation::Description description = R"(Decodes an URL from base64, like base64URLDecode but returns an empty string in case of an error.)"; + FunctionDocumentation::Syntax syntax = "tryBase64URLDecode(encodedUrl)"; + FunctionDocumentation::Arguments arguments = {{"encodedURL", "String column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string."}}; FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument."; - FunctionDocumentation::Examples examples = {{"valid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ')", "https://clickhouse.com"}, {"invalid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja')", ""}}; + FunctionDocumentation::Examples examples = {{"valid", "SELECT tryBase64URLDecode('aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ')", "https://clickhouse.com"}, {"invalid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja')", ""}}; FunctionDocumentation::Categories categories = {"String encoding"}; - factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } } diff --git a/src/IO/CascadeWriteBuffer.cpp b/src/IO/CascadeWriteBuffer.cpp index 91a42e77fdb..8b863cb253c 100644 --- a/src/IO/CascadeWriteBuffer.cpp +++ b/src/IO/CascadeWriteBuffer.cpp @@ -83,6 +83,20 @@ void CascadeWriteBuffer::finalizeImpl() } } +void CascadeWriteBuffer::cancelImpl() noexcept +{ + if (curr_buffer) + curr_buffer->position() = position(); + + for (auto & buf : prepared_sources) + { + if (buf) + { + buf->cancel(); + } + } +} + WriteBuffer * CascadeWriteBuffer::setNextBuffer() { if (first_lazy_source_num <= curr_buffer_num && curr_buffer_num < num_sources) diff --git a/src/IO/CascadeWriteBuffer.h b/src/IO/CascadeWriteBuffer.h index a003d11bd8a..7a8b11c6a87 100644 --- a/src/IO/CascadeWriteBuffer.h +++ b/src/IO/CascadeWriteBuffer.h @@ -16,7 +16,7 @@ namespace ErrorCodes * (lazy_sources contains not pointers themself, but their delayed constructors) * * Firtly, CascadeWriteBuffer redirects data to first buffer of the sequence - * If current WriteBuffer cannot receive data anymore, it throws special exception MemoryWriteBuffer::CurrentBufferExhausted in nextImpl() body, + * If current WriteBuffer cannot receive data anymore, it throws special exception WriteBuffer::CurrentBufferExhausted in nextImpl() body, * CascadeWriteBuffer prepare next buffer and continuously redirects data to it. * If there are no buffers anymore CascadeWriteBuffer throws an exception. * @@ -48,6 +48,7 @@ public: private: void finalizeImpl() override; + void cancelImpl() noexcept override; WriteBuffer * setNextBuffer(); diff --git a/src/IO/MemoryReadWriteBuffer.h b/src/IO/MemoryReadWriteBuffer.h index d7ca992aa44..a7d3e388cb3 100644 --- a/src/IO/MemoryReadWriteBuffer.h +++ b/src/IO/MemoryReadWriteBuffer.h @@ -16,11 +16,11 @@ namespace DB class MemoryWriteBuffer : public WriteBuffer, public IReadableWriteBuffer, boost::noncopyable, private Allocator { public: - /// Special exception to throw when the current WriteBuffer cannot receive data + /// Special exception to throw when the current MemoryWriteBuffer cannot receive data class CurrentBufferExhausted : public std::exception { public: - const char * what() const noexcept override { return "MemoryWriteBuffer limit is exhausted"; } + const char * what() const noexcept override { return "WriteBuffer limit is exhausted"; } }; /// Use max_total_size_ = 0 for unlimited storage diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 9229342b8c1..55441cfb86b 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -30,10 +30,6 @@ #include -#ifdef ADDRESS_SANITIZER -#include -#endif - namespace ProfileEvents { extern const Event S3WriteRequestsErrors; @@ -880,14 +876,7 @@ void ClientCacheRegistry::clearCacheForAll() ClientFactory::ClientFactory() { aws_options = Aws::SDKOptions{}; - { -#ifdef ADDRESS_SANITIZER - /// Leak sanitizer (part of address sanitizer) thinks that memory in OpenSSL (called by AWS SDK) is allocated but not - /// released. Actually, the memory is released at the end of the program (ClientFactory is a singleton, see the dtor). - __lsan::ScopedDisabler lsan_disabler; -#endif - Aws::InitAPI(aws_options); - } + Aws::InitAPI(aws_options); Aws::Utils::Logging::InitializeAWSLogging(std::make_shared(false)); Aws::Http::SetHttpClientFactory(std::make_shared()); } diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp index fa9d018eaa6..dfb7727fca4 100644 --- a/src/IO/S3/Credentials.cpp +++ b/src/IO/S3/Credentials.cpp @@ -9,6 +9,21 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } +namespace S3 +{ + std::string tryGetRunningAvailabilityZone() + { + try + { + return getRunningAvailabilityZone(); + } + catch (...) + { + tryLogCurrentException("tryGetRunningAvailabilityZone"); + return ""; + } + } +} } #if USE_AWS_S3 diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h index b8698d9b302..95297ab0538 100644 --- a/src/IO/S3/Credentials.h +++ b/src/IO/S3/Credentials.h @@ -24,6 +24,7 @@ static inline constexpr char GCP_METADATA_SERVICE_ENDPOINT[] = "http://metadata. /// getRunningAvailabilityZone returns the availability zone of the underlying compute resources where the current process runs. std::string getRunningAvailabilityZone(); +std::string tryGetRunningAvailabilityZone(); class AWSEC2MetadataClient : public Aws::Internal::AWSHttpResourceClient { @@ -195,6 +196,7 @@ namespace DB namespace S3 { std::string getRunningAvailabilityZone(); +std::string tryGetRunningAvailabilityZone(); } } diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 1cef43530e0..aab7a39534d 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -535,7 +535,7 @@ void PocoHTTPClient::makeRequestInternalImpl( const static std::string_view needle = ""; if (auto it = std::search(response_string.begin(), response_string.end(), std::default_searcher(needle.begin(), needle.end())); it != response_string.end()) { - LOG_WARNING(log, "Response for request contain tag in body, settings internal server error (500 code)"); + LOG_WARNING(log, "Response for the request contains an tag in the body, will treat it as an internal server error (code 500)"); response->SetResponseCode(Aws::Http::HttpResponseCode::INTERNAL_SERVER_ERROR); addMetric(request, S3MetricType::Errors); diff --git a/src/IO/WriteBuffer.cpp b/src/IO/WriteBuffer.cpp index bcc7445486e..a86eb4ccea2 100644 --- a/src/IO/WriteBuffer.cpp +++ b/src/IO/WriteBuffer.cpp @@ -11,7 +11,7 @@ namespace DB WriteBuffer::~WriteBuffer() { // That destructor could be call with finalized=false in case of exceptions - if (count() > 0 && !finalized) + if (count() > 0 && !finalized && !canceled) { /// It is totally OK to destroy instance without finalization when an exception occurs /// However it is suspicious to destroy instance without finalization at the green path @@ -20,7 +20,7 @@ WriteBuffer::~WriteBuffer() LoggerPtr log = getLogger("WriteBuffer"); LOG_ERROR( log, - "WriteBuffer is not finalized when destructor is called. " + "WriteBuffer is neither finalized nor canceled when destructor is called. " "No exceptions in flight are detected. " "The file might not be written at all or might be truncated. " "Stack trace: {}", @@ -30,4 +30,13 @@ WriteBuffer::~WriteBuffer() } } +void WriteBuffer::cancel() noexcept +{ + if (canceled || finalized) + return; + + LockMemoryExceptionInThread lock(VariableContext::Global); + cancelImpl(); + canceled = true; +} } diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h index ef4e0058ec3..4759f96a235 100644 --- a/src/IO/WriteBuffer.h +++ b/src/IO/WriteBuffer.h @@ -59,6 +59,7 @@ public: */ pos = working_buffer.begin(); bytes += bytes_in_buffer; + throw; } @@ -75,7 +76,6 @@ public: next(); } - void write(const char * from, size_t n) { if (finalized) @@ -121,6 +121,9 @@ public: if (finalized) return; + if (canceled) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot finalize buffer after cancellation."); + LockMemoryExceptionInThread lock(VariableContext::Global); try { @@ -130,11 +133,15 @@ public: catch (...) { pos = working_buffer.begin(); - finalized = true; + + cancel(); + throw; } } + void cancel() noexcept; + /// Wait for data to be reliably written. Mainly, call fsync for fd. /// May be called after finalize() if needed. virtual void sync() @@ -150,7 +157,12 @@ protected: next(); } + virtual void cancelImpl() noexcept + { + } + bool finalized = false; + bool canceled = false; private: /** Write the data in the buffer (from the beginning of the buffer to the current position). diff --git a/src/IO/WriteBufferDecorator.h b/src/IO/WriteBufferDecorator.h index 88161f8d232..109c2bd24e4 100644 --- a/src/IO/WriteBufferDecorator.h +++ b/src/IO/WriteBufferDecorator.h @@ -47,6 +47,11 @@ public: } } + void cancelImpl() noexcept override + { + out->cancel(); + } + WriteBuffer * getNestedBuffer() { return out; } protected: diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp index d641e553671..37b1161356f 100644 --- a/src/IO/WriteBufferFromFile.cpp +++ b/src/IO/WriteBufferFromFile.cpp @@ -79,7 +79,8 @@ WriteBufferFromFile::~WriteBufferFromFile() try { - finalize(); + if (!canceled) + finalize(); } catch (...) { @@ -111,7 +112,8 @@ void WriteBufferFromFile::close() if (fd < 0) return; - finalize(); + if (!canceled) + finalize(); if (0 != ::close(fd)) throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); diff --git a/src/IO/WriteBufferFromFileDecorator.cpp b/src/IO/WriteBufferFromFileDecorator.cpp index 0e4e5e13a86..b1e7d843d92 100644 --- a/src/IO/WriteBufferFromFileDecorator.cpp +++ b/src/IO/WriteBufferFromFileDecorator.cpp @@ -28,6 +28,12 @@ void WriteBufferFromFileDecorator::finalizeImpl() } } +void WriteBufferFromFileDecorator::cancelImpl() noexcept +{ + SwapHelper swap(*this, *impl); + impl->cancel(); +} + WriteBufferFromFileDecorator::~WriteBufferFromFileDecorator() { /// It is not a mistake that swap is called here diff --git a/src/IO/WriteBufferFromFileDecorator.h b/src/IO/WriteBufferFromFileDecorator.h index 5344bb1425c..07f843986bb 100644 --- a/src/IO/WriteBufferFromFileDecorator.h +++ b/src/IO/WriteBufferFromFileDecorator.h @@ -24,6 +24,8 @@ public: protected: void finalizeImpl() override; + void cancelImpl() noexcept override; + std::unique_ptr impl; private: diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp index a758f99458d..f1207edc55b 100644 --- a/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/src/IO/WriteBufferFromFileDescriptor.cpp @@ -107,7 +107,8 @@ WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor() { try { - finalize(); + if (!canceled) + finalize(); } catch (...) { diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp index 10d9fd131cd..5ed4dbdc787 100644 --- a/src/IO/WriteBufferFromPocoSocket.cpp +++ b/src/IO/WriteBufferFromPocoSocket.cpp @@ -197,7 +197,8 @@ WriteBufferFromPocoSocket::~WriteBufferFromPocoSocket() { try { - finalize(); + if (!canceled) + finalize(); } catch (...) { diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index cd9949862ca..3682e49b018 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -224,6 +224,11 @@ void WriteBufferFromS3::finalizeImpl() } } +void WriteBufferFromS3::cancelImpl() noexcept +{ + tryToAbortMultipartUpload(); +} + String WriteBufferFromS3::getVerboseLogDetails() const { String multipart_upload_details; @@ -246,7 +251,7 @@ String WriteBufferFromS3::getShortLogDetails() const bucket, key, multipart_upload_details); } -void WriteBufferFromS3::tryToAbortMultipartUpload() +void WriteBufferFromS3::tryToAbortMultipartUpload() noexcept { try { @@ -264,8 +269,19 @@ WriteBufferFromS3::~WriteBufferFromS3() { LOG_TRACE(limitedLog, "Close WriteBufferFromS3. {}.", getShortLogDetails()); + if (canceled) + { + LOG_INFO( + log, + "WriteBufferFromS3 was canceled." + "The file might not be written to S3. " + "{}.", + getVerboseLogDetails()); + return; + } + /// That destructor could be call with finalized=false in case of exceptions - if (!finalized) + if (!finalized && !canceled) { LOG_INFO( log, diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 973ca4c7526..b026da607c5 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -54,6 +54,8 @@ private: /// Receives response from the server after sending all data. void finalizeImpl() override; + void cancelImpl() noexcept override; + String getVerboseLogDetails() const; String getShortLogDetails() const; @@ -71,7 +73,7 @@ private: void createMultipartUpload(); void completeMultipartUpload(); void abortMultipartUpload(); - void tryToAbortMultipartUpload(); + void tryToAbortMultipartUpload() noexcept; S3::PutObjectRequest getPutRequest(PartData & data); void makeSinglepartUpload(PartData && data); diff --git a/src/IO/WriteBufferFromVector.h b/src/IO/WriteBufferFromVector.h index 1ea32af2968..17a329d401d 100644 --- a/src/IO/WriteBufferFromVector.h +++ b/src/IO/WriteBufferFromVector.h @@ -63,7 +63,8 @@ public: ~WriteBufferFromVector() override { - finalize(); + if (!canceled) + finalize(); } private: diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index 6a46ac9c330..ac3b549a576 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -118,7 +118,7 @@ namespace /// Checks if the current user has enough access rights granted with grant option to grant or revoke specified access rights. void checkGrantOption( const AccessControl & access_control, - const ContextAccess & current_user_access, + const ContextAccessWrapper & current_user_access, const std::vector & grantees_from_query, bool & need_check_grantees_are_allowed, const AccessRightsElements & elements_to_grant, @@ -200,7 +200,7 @@ namespace /// Checks if the current user has enough roles granted with admin option to grant or revoke specified roles. void checkAdminOption( const AccessControl & access_control, - const ContextAccess & current_user_access, + const ContextAccessWrapper & current_user_access, const std::vector & grantees_from_query, bool & need_check_grantees_are_allowed, const std::vector & roles_to_grant, @@ -277,7 +277,7 @@ namespace /// This function is less accurate than checkAdminOption() because it cannot use any information about /// granted roles the grantees currently have (due to those grantees are located on multiple nodes, /// we just don't have the full information about them). - void checkAdminOptionForExecutingOnCluster(const ContextAccess & current_user_access, + void checkAdminOptionForExecutingOnCluster(const ContextAccessWrapper & current_user_access, const std::vector roles_to_grant, const RolesOrUsersSet & roles_to_revoke) { @@ -376,7 +376,7 @@ namespace /// Calculates all available rights to grant with current user intersection. void calculateCurrentGrantRightsWithIntersection( AccessRights & rights, - std::shared_ptr current_user_access, + std::shared_ptr current_user_access, const AccessRightsElements & elements_to_grant) { AccessRightsElements current_user_grantable_elements; diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 54db8980e12..34f3e0a98bd 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -786,9 +786,6 @@ Block ActionsDAG::updateHeader(const Block & header) const for (auto & col : result_columns) res.insert(std::move(col)); - if (isInputProjected()) - return res; - res.reserve(header.columns() - pos_to_remove.size()); for (size_t i = 0; i < header.columns(); i++) { @@ -1150,8 +1147,33 @@ void ActionsDAG::project(const NamesWithAliases & projection) } removeUnusedActions(); - projectInput(); - projected_output = true; +} + +void ActionsDAG::appendInputsForUnusedColumns(const Block & sample_block) +{ + std::unordered_map> names_map; + size_t num_columns = sample_block.columns(); + for (size_t pos = 0; pos < num_columns; ++pos) + names_map[sample_block.getByPosition(pos).name].push_back(pos); + + for (const auto * input : inputs) + { + auto & positions = names_map[input->result_name]; + if (positions.empty()) + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, + "Not found column {} in block {}", input->result_name, sample_block.dumpStructure()); + + positions.pop_front(); + } + + for (const auto & [_, positions] : names_map) + { + for (auto pos : positions) + { + const auto & col = sample_block.getByPosition(pos); + addInput(col.name, col.type); + } + } } bool ActionsDAG::tryRestoreColumn(const std::string & column_name) @@ -1227,8 +1249,6 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name) ActionsDAGPtr ActionsDAG::clone() const { auto actions = std::make_shared(); - actions->project_input = project_input; - actions->projected_output = projected_output; std::unordered_map copy_map; @@ -1322,9 +1342,6 @@ std::string ActionsDAG::dumpDAG() const out << ' ' << map[node]; out << '\n'; - out << "Project input: " << project_input << '\n'; - out << "Projected output: " << projected_output << '\n'; - return out.str(); } @@ -1409,7 +1426,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( FunctionOverloadResolverPtr func_builder_materialize = std::make_unique(std::make_shared()); - std::map> inputs; + std::unordered_map> inputs; if (mode == MatchColumnsMode::Name) { size_t input_nodes_size = actions_dag->inputs.size(); @@ -1525,8 +1542,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( } actions_dag->outputs.swap(projection); - actions_dag->removeUnusedActions(); - actions_dag->projectInput(); + actions_dag->removeUnusedActions(false); return actions_dag; } @@ -1584,10 +1600,6 @@ void ActionsDAG::mergeInplace(ActionsDAG && second) auto it = first_result.find(input_node->result_name); if (it == first_result.end() || it->second.empty()) { - if (first.project_input) - throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, - "Cannot find column {} in ActionsDAG result", input_node->result_name); - first.inputs.push_back(input_node); } else @@ -1623,13 +1635,6 @@ void ActionsDAG::mergeInplace(ActionsDAG && second) } } - /// Update output nodes. - if (second.project_input) - { - first.outputs.swap(second.outputs); - first.project_input = true; - } - else { /// Add not removed result from first actions. for (const auto * output_node : first.outputs) @@ -1645,8 +1650,6 @@ void ActionsDAG::mergeInplace(ActionsDAG && second) } first.nodes.splice(first.nodes.end(), std::move(second.nodes)); - - first.projected_output = second.projected_output; } void ActionsDAG::mergeNodes(ActionsDAG && second, NodeRawConstPtrs * out_outputs) @@ -2042,7 +2045,6 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & } auto res = split(split_nodes); - res.second->project_input = project_input; return res; } @@ -2086,7 +2088,6 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBySortingDescription(const NameS dumpDAG()); auto res = split(split_nodes); - res.second->project_input = project_input; return res; } @@ -2158,7 +2159,6 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & co std::unordered_set split_nodes = {node}; auto res = split(split_nodes); - res.second->project_input = project_input; return res; } @@ -2745,11 +2745,7 @@ void ActionsDAG::removeUnusedConjunctions(NodeRawConstPtrs rejected_conjunctions std::unordered_set used_inputs; for (const auto * input : inputs) - { - if (removes_filter && input == predicate) - continue; used_inputs.insert(input); - } removeUnusedActions(used_inputs); } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 150fa84762f..c9974fd849c 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -103,13 +103,11 @@ private: NodeRawConstPtrs inputs; NodeRawConstPtrs outputs; - bool project_input = false; - bool projected_output = false; - public: ActionsDAG() = default; ActionsDAG(ActionsDAG &&) = default; ActionsDAG(const ActionsDAG &) = delete; + ActionsDAG & operator=(ActionsDAG &&) = default; ActionsDAG & operator=(const ActionsDAG &) = delete; explicit ActionsDAG(const NamesAndTypesList & inputs_); explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_); @@ -168,9 +166,12 @@ public: /// Call addAlias several times. void addAliases(const NamesWithAliases & aliases); - /// Add alias actions and remove unused columns from outputs. Also specify result columns order in outputs. + /// Add alias actions. Also specify result columns order in outputs. void project(const NamesWithAliases & projection); + /// Add input for every column from sample_block which is not mapped to existing input. + void appendInputsForUnusedColumns(const Block & sample_block); + /// If column is not in outputs, try to find it in nodes and insert back into outputs. bool tryRestoreColumn(const std::string & column_name); @@ -179,10 +180,6 @@ public: /// Return true if column was removed from inputs. bool removeUnusedResult(const std::string & column_name); - void projectInput(bool project = true) { project_input = project; } - bool isInputProjected() const { return project_input; } - bool isOutputProjected() const { return projected_output; } - /// Remove actions that are not needed to compute output nodes void removeUnusedActions(bool allow_remove_inputs = true, bool allow_constant_folding = true); @@ -510,4 +507,15 @@ struct ActionDAGNodes ActionsDAG::NodeRawConstPtrs nodes; }; +/// Helper for query analysis. +/// If project_input is set, all columns not found in inputs should be removed. +/// Now, we do it before adding a step to query plan by calling appendInputsForUnusedColumns. +struct ActionsAndProjectInputsFlag +{ + ActionsDAG dag; + bool project_input = false; +}; + +using ActionsAndProjectInputsFlagPtr = std::shared_ptr; + } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 0bdd4c089f1..c3285d73145 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -102,7 +103,7 @@ static size_t getTypeDepth(const DataTypePtr & type) /// 33.33 in the set is converted to 33.3, but it is not equal to 33.3 in the column, so the result should still be empty. /// We can not include values that don't represent any possible value from the type of filtered column to the set. template -static Block createBlockFromCollection(const Collection & collection, const DataTypes & types, bool transform_null_in) +static Block createBlockFromCollection(const Collection & collection, const DataTypes & value_types, const DataTypes & types, bool transform_null_in) { size_t columns_num = types.size(); MutableColumns columns(columns_num); @@ -113,11 +114,12 @@ static Block createBlockFromCollection(const Collection & collection, const Data } Row tuple_values; - for (const auto & value : collection) + for (size_t collection_index = 0; collection_index < collection.size(); ++collection_index) { + const auto& value = collection[collection_index]; if (columns_num == 1) { - auto field = convertFieldToTypeStrict(value, *types[0]); + auto field = convertFieldToTypeStrict(value, *value_types[collection_index], *types[0]); bool need_insert_null = transform_null_in && types[0]->isNullable(); if (field && (!field->isNull() || need_insert_null)) columns[0]->insert(*field); @@ -130,7 +132,6 @@ static Block createBlockFromCollection(const Collection & collection, const Data const auto & tuple = value.template get(); size_t tuple_size = tuple.size(); - if (tuple_size != columns_num) throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Incorrect size of tuple in set: {} instead of {}", tuple_size, columns_num); @@ -138,10 +139,13 @@ static Block createBlockFromCollection(const Collection & collection, const Data if (tuple_values.empty()) tuple_values.resize(tuple_size); + const DataTypePtr & value_type = value_types[collection_index]; + const DataTypes & tuple_value_type = typeid_cast(value_type.get())->getElements(); + size_t i = 0; for (; i < tuple_size; ++i) { - auto converted_field = convertFieldToTypeStrict(tuple[i], *types[i]); + auto converted_field = convertFieldToTypeStrict(tuple[i], *tuple_value_type[i], *types[i]); if (!converted_field) break; tuple_values[i] = std::move(*converted_field); @@ -317,16 +321,25 @@ Block createBlockForSet( if (left_type_depth == right_type_depth) { Array array{right_arg_value}; - block = createBlockFromCollection(array, set_element_types, tranform_null_in); + DataTypes value_types{right_arg_type}; + block = createBlockFromCollection(array, value_types, set_element_types, tranform_null_in); } /// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)); etc. else if (left_type_depth + 1 == right_type_depth) { auto type_index = right_arg_type->getTypeId(); if (type_index == TypeIndex::Tuple) - block = createBlockFromCollection(right_arg_value.get(), set_element_types, tranform_null_in); + { + const DataTypes & value_types = assert_cast(right_arg_type.get())->getElements(); + block = createBlockFromCollection(right_arg_value.get(), value_types, set_element_types, tranform_null_in); + } else if (type_index == TypeIndex::Array) - block = createBlockFromCollection(right_arg_value.get(), set_element_types, tranform_null_in); + { + const auto* right_arg_array_type = assert_cast(right_arg_type.get()); + size_t right_arg_array_size = right_arg_value.get().size(); + DataTypes value_types(right_arg_array_size, right_arg_array_type->getNestedType()); + block = createBlockFromCollection(right_arg_value.get(), value_types, set_element_types, tranform_null_in); + } else throw_unsupported_type(right_arg_type); } @@ -392,6 +405,9 @@ Block createBlockForSet( } +ScopeStack::Level::Level() = default; +ScopeStack::Level::~Level() = default; +ScopeStack::Level::Level(Level &&) noexcept = default; FutureSetPtr makeExplicitSet( const ASTFunction * node, const ActionsDAG & actions, ContextPtr context, PreparedSets & prepared_sets) @@ -486,16 +502,12 @@ public: } }; -ScopeStack::Level::~Level() = default; -ScopeStack::Level::Level() = default; -ScopeStack::Level::Level(Level &&) noexcept = default; - ActionsMatcher::Data::Data( ContextPtr context_, SizeLimits set_size_limit_, size_t subquery_depth_, std::reference_wrapper source_columns_, - ActionsDAGPtr actions_dag, + ActionsDAG actions_dag, PreparedSetsPtr prepared_sets_, bool no_subqueries_, bool no_makeset_, @@ -531,13 +543,13 @@ std::vector ActionsMatcher::Data::getAllColumnNames() const return index.getAllNames(); } -ScopeStack::ScopeStack(ActionsDAGPtr actions_dag, ContextPtr context_) : WithContext(context_) +ScopeStack::ScopeStack(ActionsDAG actions_dag, ContextPtr context_) : WithContext(context_) { auto & level = stack.emplace_back(); level.actions_dag = std::move(actions_dag); - level.index = std::make_unique(level.actions_dag->getOutputs()); + level.index = std::make_unique(level.actions_dag.getOutputs()); - for (const auto & node : level.actions_dag->getOutputs()) + for (const auto & node : level.actions_dag.getOutputs()) if (node->type == ActionsDAG::ActionType::INPUT) level.inputs.emplace(node->result_name); } @@ -545,22 +557,21 @@ ScopeStack::ScopeStack(ActionsDAGPtr actions_dag, ContextPtr context_) : WithCon void ScopeStack::pushLevel(const NamesAndTypesList & input_columns) { auto & level = stack.emplace_back(); - level.actions_dag = std::make_shared(); - level.index = std::make_unique(level.actions_dag->getOutputs()); + level.index = std::make_unique(level.actions_dag.getOutputs()); const auto & prev = stack[stack.size() - 2]; for (const auto & input_column : input_columns) { - const auto & node = level.actions_dag->addInput(input_column.name, input_column.type); + const auto & node = level.actions_dag.addInput(input_column.name, input_column.type); level.index->addNode(&node); level.inputs.emplace(input_column.name); } - for (const auto & node : prev.actions_dag->getOutputs()) + for (const auto & node : prev.actions_dag.getOutputs()) { if (!level.index->contains(node->result_name)) { - const auto & input = level.actions_dag->addInput({node->column, node->result_type, node->result_name}); + const auto & input = level.actions_dag.addInput({node->column, node->result_type, node->result_name}); level.index->addNode(&input); } } @@ -585,12 +596,12 @@ size_t ScopeStack::getColumnLevel(const std::string & name) void ScopeStack::addColumn(ColumnWithTypeAndName column) { - const auto & node = stack[0].actions_dag->addColumn(std::move(column)); + const auto & node = stack[0].actions_dag.addColumn(std::move(column)); stack[0].index->addNode(&node); for (size_t j = 1; j < stack.size(); ++j) { - const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name}); + const auto & input = stack[j].actions_dag.addInput({node.column, node.result_type, node.result_name}); stack[j].index->addNode(&input); } } @@ -599,12 +610,12 @@ void ScopeStack::addAlias(const std::string & name, std::string alias) { auto level = getColumnLevel(name); const auto & source = stack[level].index->getNode(name); - const auto & node = stack[level].actions_dag->addAlias(source, std::move(alias)); + const auto & node = stack[level].actions_dag.addAlias(source, std::move(alias)); stack[level].index->addNode(&node); for (size_t j = level + 1; j < stack.size(); ++j) { - const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name}); + const auto & input = stack[j].actions_dag.addInput({node.column, node.result_type, node.result_name}); stack[j].index->addNode(&input); } } @@ -618,12 +629,12 @@ void ScopeStack::addArrayJoin(const std::string & source_name, std::string resul throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression with arrayJoin cannot depend on lambda argument: {}", source_name); - const auto & node = stack.front().actions_dag->addArrayJoin(*source_node, std::move(result_name)); + const auto & node = stack.front().actions_dag.addArrayJoin(*source_node, std::move(result_name)); stack.front().index->addNode(&node); for (size_t j = 1; j < stack.size(); ++j) { - const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name}); + const auto & input = stack[j].actions_dag.addInput({node.column, node.result_type, node.result_name}); stack[j].index->addNode(&input); } } @@ -642,17 +653,17 @@ void ScopeStack::addFunction( for (const auto & argument : argument_names) children.push_back(&stack[level].index->getNode(argument)); - const auto & node = stack[level].actions_dag->addFunction(function, std::move(children), std::move(result_name)); + const auto & node = stack[level].actions_dag.addFunction(function, std::move(children), std::move(result_name)); stack[level].index->addNode(&node); for (size_t j = level + 1; j < stack.size(); ++j) { - const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name}); + const auto & input = stack[j].actions_dag.addInput({node.column, node.result_type, node.result_name}); stack[j].index->addNode(&input); } } -ActionsDAGPtr ScopeStack::popLevel() +ActionsDAG ScopeStack::popLevel() { auto res = std::move(stack.back().actions_dag); stack.pop_back(); @@ -661,12 +672,12 @@ ActionsDAGPtr ScopeStack::popLevel() std::string ScopeStack::dumpNames() const { - return stack.back().actions_dag->dumpNames(); + return stack.back().actions_dag.dumpNames(); } const ActionsDAG & ScopeStack::getLastActions() const { - return *stack.back().actions_dag; + return stack.back().actions_dag; } const ScopeStack::Index & ScopeStack::getLastActionsIndex() const @@ -989,7 +1000,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.set_size_limit, data.subquery_depth, data.source_columns, - std::make_shared(data.source_columns), + ActionsDAG(data.source_columns), data.prepared_sets, data.no_subqueries, data.no_makeset, @@ -1008,10 +1019,10 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & } auto dag = index_hint_data.getActions(); - dag->project(args); + dag.project(args); auto index_hint = std::make_shared(); - index_hint->setActions(std::move(dag)); + index_hint->setActions(std::make_shared(std::move(dag))); // Arguments are removed. We add function instead of constant column to avoid constant folding. data.addFunction(std::make_unique(index_hint), {}, column_name); @@ -1271,10 +1282,10 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & auto lambda_dag = data.actions_stack.popLevel(); String result_name = lambda->arguments->children.at(1)->getColumnName(); - lambda_dag->removeUnusedActions(Names(1, result_name)); + lambda_dag.removeUnusedActions(Names(1, result_name)); auto lambda_actions = std::make_shared( - lambda_dag, + std::make_shared(std::move(lambda_dag)), ExpressionActionsSettings::fromContext(data.getContext(), CompileExpressions::yes)); DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 046c7387ee8..46d2d60e461 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -9,6 +10,7 @@ #include #include #include +#include namespace DB { @@ -43,20 +45,20 @@ struct ScopeStack : WithContext struct Level { - ActionsDAGPtr actions_dag; + ActionsDAG actions_dag; IndexPtr index; NameSet inputs; + ~Level(); Level(); Level(Level &&) noexcept; - ~Level(); }; - using Levels = std::vector; + using Levels = std::deque; Levels stack; - ScopeStack(ActionsDAGPtr actions_dag, ContextPtr context_); + ScopeStack(ActionsDAG actions_dag, ContextPtr context_); void pushLevel(const NamesAndTypesList & input_columns); @@ -67,7 +69,7 @@ struct ScopeStack : WithContext void addArrayJoin(const std::string & source_name, std::string result_name); void addFunction(const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name); - ActionsDAGPtr popLevel(); + ActionsDAG popLevel(); const ActionsDAG & getLastActions() const; const Index & getLastActionsIndex() const; @@ -147,7 +149,7 @@ public: SizeLimits set_size_limit_, size_t subquery_depth_, std::reference_wrapper source_columns_, - ActionsDAGPtr actions_dag, + ActionsDAG actions_dag, PreparedSetsPtr prepared_sets_, bool no_subqueries_, bool no_makeset_, @@ -182,7 +184,7 @@ public: actions_stack.addFunction(function, argument_names, std::move(result_name)); } - ActionsDAGPtr getActions() + ActionsDAG getActions() { return actions_stack.popLevel(); } diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h index ab078d1c5e5..43c80d361d1 100644 --- a/src/Interpreters/AggregationCommon.h +++ b/src/Interpreters/AggregationCommon.h @@ -90,10 +90,7 @@ void fillFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const S /// Note: here we violate strict aliasing. /// It should be ok as log as we do not reffer to any value from `out` before filling. const char * source = static_cast(column)->getRawDataBegin(); - size_t offset_to = offset; - if constexpr (std::endian::native == std::endian::big) - offset_to = sizeof(Key) - sizeof(T) - offset; - T * dest = reinterpret_cast(reinterpret_cast(out.data()) + offset_to); + T * dest = reinterpret_cast(reinterpret_cast(out.data()) + offset); fillFixedBatch(num_rows, reinterpret_cast(source), dest); /// NOLINT(bugprone-sizeof-expression) offset += sizeof(T); } diff --git a/src/Interpreters/AggregationMethod.cpp b/src/Interpreters/AggregationMethod.cpp index 3ff4f0cae43..0fc789528b8 100644 --- a/src/Interpreters/AggregationMethod.cpp +++ b/src/Interpreters/AggregationMethod.cpp @@ -160,10 +160,7 @@ void AggregationMethodKeysFixedinsertData(reinterpret_cast(&key) + offset_to, size); + observed_column->insertData(reinterpret_cast(&key) + pos, size); pos += size; } } diff --git a/src/Interpreters/AsynchronousMetricLog.h b/src/Interpreters/AsynchronousMetricLog.h index 739b2aa5b56..2ce1d929592 100644 --- a/src/Interpreters/AsynchronousMetricLog.h +++ b/src/Interpreters/AsynchronousMetricLog.h @@ -8,8 +8,6 @@ #include #include -#include -#include #include diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index 55453b78ead..8d2a9d0a2da 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -6,7 +6,7 @@ namespace DB static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 32 * 1024 * 1024; /// 32Mi static constexpr int FILECACHE_DEFAULT_FILE_SEGMENT_ALIGNMENT = 4 * 1024 * 1024; /// 4Mi -static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS = 5; +static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS = 0; static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_QUEUE_SIZE_LIMIT = 5000; static constexpr int FILECACHE_DEFAULT_LOAD_METADATA_THREADS = 16; static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 10000000; diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 61a356fa3c3..838ca0b491e 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -187,13 +187,6 @@ size_t FileSegment::getDownloadedSize() const return downloaded_size; } -void FileSegment::setDownloadedSize(size_t delta) -{ - auto lk = lock(); - downloaded_size += delta; - assert(downloaded_size == std::filesystem::file_size(getPath())); -} - bool FileSegment::isDownloaded() const { auto lk = lock(); @@ -311,6 +304,11 @@ FileSegment::RemoteFileReaderPtr FileSegment::getRemoteFileReader() return remote_file_reader; } +FileSegment::LocalCacheWriterPtr FileSegment::getLocalCacheWriter() +{ + return cache_writer; +} + void FileSegment::resetRemoteFileReader() { auto lk = lock(); @@ -340,33 +338,31 @@ void FileSegment::setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_) remote_file_reader = remote_file_reader_; } -void FileSegment::write(char * from, size_t size, size_t offset) +void FileSegment::write(char * from, size_t size, size_t offset_in_file) { ProfileEventTimeIncrement watch(ProfileEvents::FileSegmentWriteMicroseconds); - - if (!size) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing zero size is not allowed"); - + auto file_segment_path = getPath(); { - auto lk = lock(); - assertIsDownloaderUnlocked("write", lk); - assertNotDetachedUnlocked(lk); - } + if (!size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing zero size is not allowed"); - const auto file_segment_path = getPath(); + { + auto lk = lock(); + assertIsDownloaderUnlocked("write", lk); + assertNotDetachedUnlocked(lk); + } - { if (download_state != State::DOWNLOADING) throw Exception( ErrorCodes::LOGICAL_ERROR, "Expected DOWNLOADING state, got {}", stateToString(download_state)); const size_t first_non_downloaded_offset = getCurrentWriteOffset(); - if (offset != first_non_downloaded_offset) + if (offset_in_file != first_non_downloaded_offset) throw Exception( ErrorCodes::LOGICAL_ERROR, "Attempt to write {} bytes to offset: {}, but current write offset is {}", - size, offset, first_non_downloaded_offset); + size, offset_in_file, first_non_downloaded_offset); const size_t current_downloaded_size = getDownloadedSize(); chassert(reserved_size >= current_downloaded_size); @@ -396,10 +392,10 @@ void FileSegment::write(char * from, size_t size, size_t offset) #endif if (!cache_writer) - cache_writer = std::make_unique(file_segment_path, /* buf_size */0); + cache_writer = std::make_unique(getPath(), /* buf_size */0); /// Size is equal to offset as offset for write buffer points to data end. - cache_writer->set(from, size, /* offset */size); + cache_writer->set(from, /* size */size, /* offset */size); /// Reset the buffer when finished. SCOPE_EXIT({ cache_writer->set(nullptr, 0); }); /// Flush the buffer. @@ -435,7 +431,6 @@ void FileSegment::write(char * from, size_t size, size_t offset) } throw; - } catch (Exception & e) { @@ -445,7 +440,7 @@ void FileSegment::write(char * from, size_t size, size_t offset) throw; } - chassert(getCurrentWriteOffset() == offset + size); + chassert(getCurrentWriteOffset() == offset_in_file + size); } FileSegment::State FileSegment::wait(size_t offset) @@ -828,7 +823,7 @@ bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock & lock) }; const auto file_path = getPath(); - if (segment_kind != FileSegmentKind::Temporary) + { std::lock_guard lk(write_mutex); if (downloaded_size == 0) diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index f28482a1ce4..d6b37b60dc1 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -48,7 +48,7 @@ friend class FileCache; /// Because of reserved_size in tryReserve(). public: using Key = FileCacheKey; using RemoteFileReaderPtr = std::shared_ptr; - using LocalCacheWriterPtr = std::unique_ptr; + using LocalCacheWriterPtr = std::shared_ptr; using Downloader = std::string; using DownloaderId = std::string; using Priority = IFileCachePriority; @@ -204,7 +204,7 @@ public: bool reserve(size_t size_to_reserve, size_t lock_wait_timeout_milliseconds, FileCacheReserveStat * reserve_stat = nullptr); /// Write data into reserved space. - void write(char * from, size_t size, size_t offset); + void write(char * from, size_t size, size_t offset_in_file); // Invariant: if state() != DOWNLOADING and remote file reader is present, the reader's // available() == 0, and getFileOffsetOfBufferEnd() == our getCurrentWriteOffset(). @@ -212,6 +212,7 @@ public: // The reader typically requires its internal_buffer to be assigned from the outside before // calling next(). RemoteFileReaderPtr getRemoteFileReader(); + LocalCacheWriterPtr getLocalCacheWriter(); RemoteFileReaderPtr extractRemoteFileReader(); @@ -219,8 +220,6 @@ public: void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_); - void setDownloadedSize(size_t delta); - void setDownloadFailed(); private: diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 5ed4ccdbeca..1d23278a255 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -944,14 +944,7 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( try { const auto path = key_metadata->getFileSegmentPath(*file_segment); - if (file_segment->segment_kind == FileSegmentKind::Temporary) - { - /// FIXME: For temporary file segment the requirement is not as strong because - /// the implementation of "temporary data in cache" creates files in advance. - if (fs::exists(path)) - fs::remove(path); - } - else if (file_segment->downloaded_size == 0) + if (file_segment->downloaded_size == 0) { chassert(!fs::exists(path)); } diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index a593ebfdab2..e654d091561 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -33,21 +34,20 @@ namespace } WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegment * file_segment_) - : WriteBufferFromFileDecorator(std::make_unique(file_segment_->getPath())) + : WriteBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) , file_segment(file_segment_) , reserve_space_lock_wait_timeout_milliseconds(getCacheLockWaitTimeout()) { } WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegmentsHolderPtr segment_holder_) - : WriteBufferFromFileDecorator( - segment_holder_->size() == 1 - ? std::make_unique(segment_holder_->front().getPath()) - : throw Exception(ErrorCodes::LOGICAL_ERROR, "WriteBufferToFileSegment can be created only from single segment")) + : WriteBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) , file_segment(&segment_holder_->front()) , segment_holder(std::move(segment_holder_)) , reserve_space_lock_wait_timeout_milliseconds(getCacheLockWaitTimeout()) { + if (segment_holder->size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "WriteBufferToFileSegment can be created only from single segment"); } /// If it throws an exception, the file segment will be incomplete, so you should not use it in the future. @@ -82,9 +82,6 @@ void WriteBufferToFileSegment::nextImpl() reserve_stat_msg += fmt::format("{} hold {}, can release {}; ", toString(kind), ReadableSize(stat.non_releasable_size), ReadableSize(stat.releasable_size)); - if (std::filesystem::exists(file_segment->getPath())) - std::filesystem::remove(file_segment->getPath()); - throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve {} bytes for {}: {}(segment info: {})", bytes_to_write, file_segment->getKind() == FileSegmentKind::Temporary ? "temporary file" : "the file in cache", @@ -95,17 +92,37 @@ void WriteBufferToFileSegment::nextImpl() try { - SwapHelper swap(*this, *impl); /// Write data to the underlying buffer. - impl->next(); + file_segment->write(working_buffer.begin(), bytes_to_write, written_bytes); + written_bytes += bytes_to_write; } catch (...) { LOG_WARNING(getLogger("WriteBufferToFileSegment"), "Failed to write to the underlying buffer ({})", file_segment->getInfoForLog()); throw; } +} - file_segment->setDownloadedSize(bytes_to_write); +void WriteBufferToFileSegment::finalizeImpl() +{ + next(); + auto cache_writer = file_segment->getLocalCacheWriter(); + if (cache_writer) + { + SwapHelper swap(*this, *cache_writer); + cache_writer->finalize(); + } +} + +void WriteBufferToFileSegment::sync() +{ + next(); + auto cache_writer = file_segment->getLocalCacheWriter(); + if (cache_writer) + { + SwapHelper swap(*this, *cache_writer); + cache_writer->sync(); + } } std::unique_ptr WriteBufferToFileSegment::getReadBufferImpl() @@ -114,7 +131,10 @@ std::unique_ptr WriteBufferToFileSegment::getReadBufferImpl() * because in case destructor called without `getReadBufferImpl` called, data won't be read. */ finalize(); - return std::make_unique(file_segment->getPath()); + if (file_segment->getDownloadedSize() > 0) + return std::make_unique(file_segment->getPath()); + else + return std::make_unique(); } } diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.h b/src/Interpreters/Cache/WriteBufferToFileSegment.h index c4b0491f8c0..4719dd4be89 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.h +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.h @@ -9,7 +9,7 @@ namespace DB class FileSegment; -class WriteBufferToFileSegment : public WriteBufferFromFileDecorator, public IReadableWriteBuffer +class WriteBufferToFileSegment : public WriteBufferFromFileBase, public IReadableWriteBuffer { public: explicit WriteBufferToFileSegment(FileSegment * file_segment_); @@ -17,6 +17,13 @@ public: void nextImpl() override; + std::string getFileName() const override { return file_segment->getPath(); } + + void sync() override; + +protected: + void finalizeImpl() override; + private: std::unique_ptr getReadBufferImpl() override; @@ -29,6 +36,7 @@ private: FileSegmentsHolderPtr segment_holder; const size_t reserve_space_lock_wait_timeout_milliseconds; + size_t written_bytes = 0; }; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index f4433cd8288..b946c2cb21e 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -91,6 +91,7 @@ #include #include #include +#include #include #include #include @@ -281,6 +282,8 @@ struct ContextSharedPart : boost::noncopyable String default_profile_name; /// Default profile name used for default values. String system_profile_name; /// Profile used by system processes String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying + String merge_workload TSA_GUARDED_BY(mutex); /// Workload setting value that is used by all merges + String mutation_workload TSA_GUARDED_BY(mutex); /// Workload setting value that is used by all mutations std::unique_ptr access_control TSA_GUARDED_BY(mutex); mutable OnceFlag resource_manager_initialized; mutable ResourceManagerPtr resource_manager; @@ -365,6 +368,9 @@ struct ContextSharedPart : boost::noncopyable std::atomic_size_t max_view_num_to_warn = 10000lu; std::atomic_size_t max_dictionary_num_to_warn = 1000lu; std::atomic_size_t max_part_num_to_warn = 100000lu; + /// Only for system.server_settings, actually value stored in reloader itself + std::atomic_size_t config_reload_interval_ms = ConfigReloader::DEFAULT_RELOAD_INTERVAL.count(); + String format_schema_path; /// Path to a directory that contains schema files used by input formats. String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types. mutable OnceFlag action_locks_manager_initialized; @@ -677,6 +683,9 @@ struct ContextSharedPart : boost::noncopyable } } + LOG_TRACE(log, "Shutting down AccessControl"); + access_control->shutdown(); + { std::lock_guard lock(mutex); @@ -833,6 +842,7 @@ ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part) auto res = std::shared_ptr(new Context); res->shared = shared_part; res->query_access_info = std::make_shared(); + res->query_privileges_info = std::make_shared(); return res; } @@ -1425,7 +1435,7 @@ void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, void Context::checkAccess(const AccessRightsElement & element) const { checkAccessImpl(element); } void Context::checkAccess(const AccessRightsElements & elements) const { checkAccessImpl(elements); } -std::shared_ptr Context::getAccess() const +std::shared_ptr Context::getAccess() const { /// A helper function to collect parameters for calculating access rights, called with Context::getLocalSharedLock() acquired. auto get_params = [this]() @@ -1442,14 +1452,14 @@ std::shared_ptr Context::getAccess() const { SharedLockGuard lock(mutex); if (access && !need_recalculate_access) - return access; /// No need to recalculate access rights. + return std::make_shared(access, shared_from_this()); /// No need to recalculate access rights. params.emplace(get_params()); if (access && (access->getParams() == *params)) { need_recalculate_access = false; - return access; /// No need to recalculate access rights. + return std::make_shared(access, shared_from_this()); /// No need to recalculate access rights. } } @@ -1469,7 +1479,7 @@ std::shared_ptr Context::getAccess() const } } - return res; + return std::make_shared(res, shared_from_this()); } RowPolicyFilterPtr Context::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const @@ -1561,11 +1571,36 @@ ResourceManagerPtr Context::getResourceManager() const ClassifierPtr Context::getWorkloadClassifier() const { std::lock_guard lock(mutex); + // NOTE: Workload cannot be changed after query start, and getWorkloadClassifier() should not be called before proper `workload` is set if (!classifier) classifier = getResourceManager()->acquire(getSettingsRef().workload); return classifier; } +String Context::getMergeWorkload() const +{ + SharedLockGuard lock(shared->mutex); + return shared->merge_workload; +} + +void Context::setMergeWorkload(const String & value) +{ + std::lock_guard lock(shared->mutex); + shared->merge_workload = value; +} + +String Context::getMutationWorkload() const +{ + SharedLockGuard lock(shared->mutex); + return shared->mutation_workload; +} + +void Context::setMutationWorkload(const String & value) +{ + std::lock_guard lock(shared->mutex); + shared->mutation_workload = value; +} + Scalars Context::getScalars() const { @@ -1830,6 +1865,15 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String } } +void Context::addQueryPrivilegesInfo(const String & privilege, bool granted) const +{ + std::lock_guard lock(query_privileges_info->mutex); + if (granted) + query_privileges_info->used_privileges.emplace(privilege); + else + query_privileges_info->missing_privileges.emplace(privilege); +} + static bool findIdentifier(const ASTFunction * function) { if (!function || !function->arguments) @@ -2072,7 +2116,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const } -StoragePtr Context::buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name) +StoragePtr Context::buildParametrizedViewStorage(const String & database_name, const String & table_name, const NameToNameMap & param_values) { if (table_name.empty()) return nullptr; @@ -2085,8 +2129,7 @@ StoragePtr Context::buildParametrizedViewStorage(const ASTPtr & table_expression return nullptr; auto query = original_view->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); - NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, getQueryContext()); - StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); + StorageView::replaceQueryParametersIfParametrizedView(query, param_values); ASTCreateQuery create; create.select = query->as(); @@ -2511,6 +2554,21 @@ void Context::makeQueryContext() local_read_query_throttler.reset(); local_write_query_throttler.reset(); backups_query_throttler.reset(); + query_privileges_info = std::make_shared(*query_privileges_info); +} + +void Context::makeQueryContextForMerge(const MergeTreeSettings & merge_tree_settings) +{ + makeQueryContext(); + classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes + settings.workload = merge_tree_settings.merge_workload.value.empty() ? getMergeWorkload() : merge_tree_settings.merge_workload; +} + +void Context::makeQueryContextForMutate(const MergeTreeSettings & merge_tree_settings) +{ + makeQueryContext(); + classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes + settings.workload = merge_tree_settings.mutation_workload.value.empty() ? getMutationWorkload() : merge_tree_settings.mutation_workload; } void Context::makeSessionContext() @@ -3344,8 +3402,6 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const const auto & config = shared->zookeeper_config ? *shared->zookeeper_config : getConfigRef(); if (!shared->zookeeper) shared->zookeeper = zkutil::ZooKeeper::create(config, zkutil::getZooKeeperConfigName(config), getZooKeeperLog()); - else if (shared->zookeeper->hasReachedDeadline()) - shared->zookeeper->finalize("ZooKeeper session has reached its deadline"); if (shared->zookeeper->expired()) { @@ -4077,7 +4133,7 @@ std::shared_ptr Context::getFilesystemCacheLog() const return shared->system_logs->filesystem_cache_log; } -std::shared_ptr Context::getS3QueueLog() const +std::shared_ptr Context::getS3QueueLog() const { SharedLockGuard lock(shared->mutex); if (!shared->system_logs) @@ -4086,6 +4142,15 @@ std::shared_ptr Context::getS3QueueLog() const return shared->system_logs->s3_queue_log; } +std::shared_ptr Context::getAzureQueueLog() const +{ + SharedLockGuard lock(shared->mutex); + if (!shared->system_logs) + return {}; + + return shared->system_logs->azure_queue_log; +} + std::shared_ptr Context::getFilesystemReadPrefetchesLog() const { SharedLockGuard lock(shared->mutex); @@ -4448,6 +4513,16 @@ void Context::checkPartitionCanBeDropped(const String & database, const String & checkCanBeDropped(database, table, partition_size, max_partition_size_to_drop); } +void Context::setConfigReloaderInterval(size_t value_ms) +{ + shared->config_reload_interval_ms.store(value_ms, std::memory_order_relaxed); +} + +size_t Context::getConfigReloaderInterval() const +{ + return shared->config_reload_interval_ms.load(std::memory_order_relaxed); +} + InputFormatPtr Context::getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size, const std::optional & format_settings, std::optional max_parsing_threads) const { return FormatFactory::instance().getInput(name, buf, sample, shared_from_this(), max_block_size, format_settings, max_parsing_threads); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 7c7b2e4ea64..f9b91a45978 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -50,6 +50,7 @@ class ASTSelectQuery; struct ContextSharedPart; class ContextAccess; +class ContextAccessWrapper; struct User; using UserPtr = std::shared_ptr; struct SettingsProfilesInfo; @@ -106,7 +107,7 @@ class TransactionsInfoLog; class ProcessorsProfileLog; class FilesystemCacheLog; class FilesystemReadPrefetchesLog; -class S3QueueLog; +class ObjectStorageQueueLog; class AsynchronousInsertLog; class BackupLog; class BlobStorageLog; @@ -403,9 +404,31 @@ public: mutable std::mutex mutex; }; + struct QueryPrivilegesInfo + { + QueryPrivilegesInfo() = default; + + QueryPrivilegesInfo(const QueryPrivilegesInfo & rhs) + { + std::lock_guard lock(rhs.mutex); + used_privileges = rhs.used_privileges; + missing_privileges = rhs.missing_privileges; + } + + QueryPrivilegesInfo(QueryPrivilegesInfo && rhs) = delete; + + std::unordered_set used_privileges TSA_GUARDED_BY(mutex); + std::unordered_set missing_privileges TSA_GUARDED_BY(mutex); + + mutable std::mutex mutex; + }; + + using QueryPrivilegesInfoPtr = std::shared_ptr; + protected: /// Needs to be changed while having const context in factories methods mutable QueryFactoriesInfo query_factories_info; + QueryPrivilegesInfoPtr query_privileges_info; /// Query metrics for reading data asynchronously with IAsynchronousReader. mutable std::shared_ptr async_read_counters; @@ -612,7 +635,7 @@ public: void checkAccess(const AccessRightsElement & element) const; void checkAccess(const AccessRightsElements & elements) const; - std::shared_ptr getAccess() const; + std::shared_ptr getAccess() const; RowPolicyFilterPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const; @@ -622,6 +645,10 @@ public: /// Resource management related ResourceManagerPtr getResourceManager() const; ClassifierPtr getWorkloadClassifier() const; + String getMergeWorkload() const; + void setMergeWorkload(const String & value); + String getMutationWorkload() const; + void setMutationWorkload(const String & value); /// We have to copy external tables inside executeQuery() to track limits. Therefore, set callback for it. Must set once. void setExternalTablesInitializer(ExternalTablesInitializer && initializer); @@ -737,13 +764,17 @@ public: QueryFactoriesInfo getQueryFactoriesInfo() const; void addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const; + const QueryPrivilegesInfo & getQueryPrivilegesInfo() const { return *getQueryPrivilegesInfoPtr(); } + QueryPrivilegesInfoPtr getQueryPrivilegesInfoPtr() const { return query_privileges_info; } + void addQueryPrivilegesInfo(const String & privilege, bool granted) const; + /// For table functions s3/file/url/hdfs/input we can use structure from /// insertion table depending on select expression. StoragePtr executeTableFunction(const ASTPtr & table_expression, const ASTSelectQuery * select_query_hint = nullptr); /// Overload for the new analyzer. Structure inference is performed in QueryAnalysisPass. StoragePtr executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr); - StoragePtr buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name); + StoragePtr buildParametrizedViewStorage(const String & database_name, const String & table_name, const NameToNameMap & param_values); void addViewSource(const StoragePtr & storage); StoragePtr getViewSource() const; @@ -907,6 +938,8 @@ public: void setSessionContext(ContextMutablePtr context_) { session_context = context_; } void makeQueryContext(); + void makeQueryContextForMerge(const MergeTreeSettings & merge_tree_settings); + void makeQueryContextForMutate(const MergeTreeSettings & merge_tree_settings); void makeSessionContext(); void makeGlobalContext(); @@ -1100,7 +1133,8 @@ public: std::shared_ptr getTransactionsInfoLog() const; std::shared_ptr getProcessorsProfileLog() const; std::shared_ptr getFilesystemCacheLog() const; - std::shared_ptr getS3QueueLog() const; + std::shared_ptr getS3QueueLog() const; + std::shared_ptr getAzureQueueLog() const; std::shared_ptr getFilesystemReadPrefetchesLog() const; std::shared_ptr getAsynchronousInsertLog() const; std::shared_ptr getBackupLog() const; @@ -1128,6 +1162,9 @@ public: size_t getMaxPartitionSizeToDrop() const; void checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size) const; void checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size, const size_t & max_partition_size_to_drop) const; + /// Only for system.server_settings, actual value is stored in ConfigReloader + void setConfigReloaderInterval(size_t value_ms); + size_t getConfigReloaderInterval() const; /// Lets you select the compression codec according to the conditions described in the configuration file. std::shared_ptr chooseCompressionCodec(size_t part_size, double part_size_ratio) const; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 0f4c8cc26a6..aaec94a4fb0 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -63,6 +63,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int HAVE_DEPENDENT_OBJECTS; extern const int UNFINISHED; + extern const int INFINITE_LOOP; } class DatabaseNameHints : public IHints<> @@ -1473,6 +1474,114 @@ void DatabaseCatalog::checkTableCanBeRemovedOrRenamedUnlocked( removing_table, fmt::join(from_other_databases, ", ")); } +void DatabaseCatalog::checkTableCanBeAddedWithNoCyclicDependencies( + const QualifiedTableName & table_name, + const TableNamesSet & new_referential_dependencies, + const TableNamesSet & new_loading_dependencies) +{ + std::lock_guard lock{databases_mutex}; + + StorageID table_id = StorageID{table_name}; + + auto check = [&](TablesDependencyGraph & dependencies, const TableNamesSet & new_dependencies) + { + auto old_dependencies = dependencies.removeDependencies(table_id); + dependencies.addDependencies(table_name, new_dependencies); + auto restore_dependencies = [&]() + { + dependencies.removeDependencies(table_id); + if (!old_dependencies.empty()) + dependencies.addDependencies(table_id, old_dependencies); + }; + + if (dependencies.hasCyclicDependencies()) + { + auto cyclic_dependencies_description = dependencies.describeCyclicDependencies(); + restore_dependencies(); + throw Exception( + ErrorCodes::INFINITE_LOOP, + "Cannot add dependencies for '{}', because it will lead to cyclic dependencies: {}", + table_name.getFullName(), + cyclic_dependencies_description); + } + + restore_dependencies(); + }; + + check(referential_dependencies, new_referential_dependencies); + check(loading_dependencies, new_loading_dependencies); +} + +void DatabaseCatalog::checkTableCanBeRenamedWithNoCyclicDependencies(const StorageID & from_table_id, const StorageID & to_table_id) +{ + std::lock_guard lock{databases_mutex}; + + auto check = [&](TablesDependencyGraph & dependencies) + { + auto old_dependencies = dependencies.removeDependencies(from_table_id); + dependencies.addDependencies(to_table_id, old_dependencies); + auto restore_dependencies = [&]() + { + dependencies.removeDependencies(to_table_id); + dependencies.addDependencies(from_table_id, old_dependencies); + }; + + if (dependencies.hasCyclicDependencies()) + { + auto cyclic_dependencies_description = dependencies.describeCyclicDependencies(); + restore_dependencies(); + throw Exception( + ErrorCodes::INFINITE_LOOP, + "Cannot rename '{}' to '{}', because it will lead to cyclic dependencies: {}", + from_table_id.getFullTableName(), + to_table_id.getFullTableName(), + cyclic_dependencies_description); + } + + restore_dependencies(); + }; + + check(referential_dependencies); + check(loading_dependencies); +} + +void DatabaseCatalog::checkTablesCanBeExchangedWithNoCyclicDependencies(const StorageID & table_id_1, const StorageID & table_id_2) +{ + std::lock_guard lock{databases_mutex}; + + auto check = [&](TablesDependencyGraph & dependencies) + { + auto old_dependencies_1 = dependencies.removeDependencies(table_id_1); + auto old_dependencies_2 = dependencies.removeDependencies(table_id_2); + dependencies.addDependencies(table_id_1, old_dependencies_2); + dependencies.addDependencies(table_id_2, old_dependencies_1); + auto restore_dependencies = [&]() + { + dependencies.removeDependencies(table_id_1); + dependencies.removeDependencies(table_id_2); + dependencies.addDependencies(table_id_1, old_dependencies_1); + dependencies.addDependencies(table_id_2, old_dependencies_2); + }; + + if (dependencies.hasCyclicDependencies()) + { + auto cyclic_dependencies_description = dependencies.describeCyclicDependencies(); + restore_dependencies(); + throw Exception( + ErrorCodes::INFINITE_LOOP, + "Cannot exchange '{}' and '{}', because it will lead to cyclic dependencies: {}", + table_id_1.getFullTableName(), + table_id_2.getFullTableName(), + cyclic_dependencies_description); + } + + restore_dependencies(); + }; + + check(referential_dependencies); + check(loading_dependencies); +} + void DatabaseCatalog::cleanupStoreDirectoryTask() { for (const auto & [disk_name, disk] : getContext()->getDisksMap()) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 37125d9900c..17d34e96245 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -129,6 +129,7 @@ public: static constexpr const char * SYSTEM_DATABASE = "system"; static constexpr const char * INFORMATION_SCHEMA = "information_schema"; static constexpr const char * INFORMATION_SCHEMA_UPPERCASE = "INFORMATION_SCHEMA"; + static constexpr const char * DEFAULT_DATABASE = "default"; /// Returns true if a passed name is one of the predefined databases' names. static bool isPredefinedDatabase(std::string_view database_name); @@ -244,6 +245,9 @@ public: void checkTableCanBeRemovedOrRenamed(const StorageID & table_id, bool check_referential_dependencies, bool check_loading_dependencies, bool is_drop_database = false) const; + void checkTableCanBeAddedWithNoCyclicDependencies(const QualifiedTableName & table_name, const TableNamesSet & new_referential_dependencies, const TableNamesSet & new_loading_dependencies); + void checkTableCanBeRenamedWithNoCyclicDependencies(const StorageID & from_table_id, const StorageID & to_table_id); + void checkTablesCanBeExchangedWithNoCyclicDependencies(const StorageID & table_id_1, const StorageID & table_id_2); struct TableMarkedAsDropped { diff --git a/src/Interpreters/ErrorLog.cpp b/src/Interpreters/ErrorLog.cpp new file mode 100644 index 00000000000..42616f13e24 --- /dev/null +++ b/src/Interpreters/ErrorLog.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +ColumnsDescription ErrorLogElement::getColumnsDescription() +{ + ParserCodec codec_parser; + return ColumnsDescription { + { + "hostname", + std::make_shared(std::make_shared()), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Hostname of the server executing the query." + }, + { + "event_date", + std::make_shared(), + parseQuery(codec_parser, "(Delta(2), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Event date." + }, + { + "event_time", + std::make_shared(), + parseQuery(codec_parser, "(Delta(4), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Event time." + }, + { + "code", + std::make_shared(), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Error code." + }, + { + "error", + std::make_shared(std::make_shared()), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Error name." + }, + { + "value", + std::make_shared(), + parseQuery(codec_parser, "(ZSTD(3))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Number of errors happened in time interval." + }, + { + "remote", + std::make_shared(), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Remote exception (i.e. received during one of the distributed queries)." + } + }; +} + +void ErrorLogElement::appendToBlock(MutableColumns & columns) const +{ + size_t column_idx = 0; + + columns[column_idx++]->insert(getFQDNOrHostName()); + columns[column_idx++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[column_idx++]->insert(event_time); + columns[column_idx++]->insert(code); + columns[column_idx++]->insert(ErrorCodes::getName(code)); + columns[column_idx++]->insert(value); + columns[column_idx++]->insert(remote); +} + +struct ValuePair +{ + UInt64 local = 0; + UInt64 remote = 0; +}; + +void ErrorLog::stepFunction(TimePoint current_time) +{ + /// Static lazy initialization to avoid polluting the header with implementation details + static std::vector previous_values(ErrorCodes::end()); + + auto event_time = std::chrono::system_clock::to_time_t(current_time); + + for (ErrorCodes::ErrorCode code = 0, end = ErrorCodes::end(); code < end; ++code) + { + const auto & error = ErrorCodes::values[code].get(); + if (error.local.count != previous_values.at(code).local) + { + ErrorLogElement local_elem { + .event_time=event_time, + .code=code, + .value=error.local.count - previous_values.at(code).local, + .remote=false + }; + this->add(std::move(local_elem)); + previous_values[code].local = error.local.count; + } + if (error.remote.count != previous_values.at(code).remote) + { + ErrorLogElement remote_elem { + .event_time=event_time, + .code=code, + .value=error.remote.count - previous_values.at(code).remote, + .remote=true + }; + this->add(std::move(remote_elem)); + previous_values[code].remote = error.remote.count; + } + } +} + +} diff --git a/src/Interpreters/ErrorLog.h b/src/Interpreters/ErrorLog.h new file mode 100644 index 00000000000..4afe334d4de --- /dev/null +++ b/src/Interpreters/ErrorLog.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +/** ErrorLog is a log of error values measured at regular time interval. + */ + +struct ErrorLogElement +{ + time_t event_time{}; + ErrorCodes::ErrorCode code{}; + ErrorCodes::Value value{}; + bool remote{}; + + static std::string name() { return "ErrorLog"; } + static ColumnsDescription getColumnsDescription(); + static NamesAndAliases getNamesAndAliases() { return {}; } + void appendToBlock(MutableColumns & columns) const; +}; + + +class ErrorLog : public PeriodicLog +{ + using PeriodicLog::PeriodicLog; + +protected: + void stepFunction(TimePoint current_time) override; +}; + +} diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 04f29f35c3c..d832f568cb8 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -49,8 +49,9 @@ namespace ErrorCodes static std::unordered_set processShortCircuitFunctions(const ActionsDAG & actions_dag, ShortCircuitFunctionEvaluation short_circuit_function_evaluation); -ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_) - : settings(settings_) +ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_, bool project_inputs_) + : project_inputs(project_inputs_) + , settings(settings_) { actions_dag = actions_dag_->clone(); @@ -194,6 +195,10 @@ static void setLazyExecutionInfo( } lazy_execution_info.short_circuit_ancestors_info[parent].insert(indexes.begin(), indexes.end()); + /// After checking arguments_with_disabled_lazy_execution, if there is no relation with parent, + /// disable the current node. + if (indexes.empty()) + lazy_execution_info.can_be_lazy_executed = false; } else /// If lazy execution is disabled for one of parents, we should disable it for current node. @@ -291,9 +296,9 @@ static std::unordered_set processShortCircuitFunctions /// Firstly, find all short-circuit functions and get their settings. std::unordered_map short_circuit_nodes; - IFunctionBase::ShortCircuitSettings short_circuit_settings; for (const auto & node : nodes) { + IFunctionBase::ShortCircuitSettings short_circuit_settings; if (node.type == ActionsDAG::ActionType::FUNCTION && node.function_base->isShortCircuit(short_circuit_settings, node.children.size()) && !node.children.empty()) short_circuit_nodes[&node] = short_circuit_settings; } @@ -757,7 +762,7 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run, } } - if (actions_dag->isInputProjected()) + if (project_inputs) { block.clear(); } @@ -862,7 +867,7 @@ std::string ExpressionActions::dumpActions() const for (const auto & output_column : output_columns) ss << output_column.name << " " << output_column.type->getName() << "\n"; - ss << "\nproject input: " << actions_dag->isInputProjected() << "\noutput positions:"; + ss << "\noutput positions:"; for (auto pos : result_positions) ss << " " << pos; ss << "\n"; @@ -926,7 +931,6 @@ JSONBuilder::ItemPtr ExpressionActions::toTree() const map->add("Actions", std::move(actions_array)); map->add("Outputs", std::move(outputs_array)); map->add("Positions", std::move(positions_array)); - map->add("Project Input", actions_dag->isInputProjected()); return map; } @@ -980,7 +984,7 @@ void ExpressionActionsChain::addStep(NameSet non_constant_inputs) if (column.column && isColumnConst(*column.column) && non_constant_inputs.contains(column.name)) column.column = nullptr; - steps.push_back(std::make_unique(std::make_shared(columns))); + steps.push_back(std::make_unique(std::make_shared(ActionsDAG(columns), false))); } void ExpressionActionsChain::finalize() @@ -1129,14 +1133,14 @@ void ExpressionActionsChain::JoinStep::finalize(const NameSet & required_output_ std::swap(result_columns, new_result_columns); } -ActionsDAGPtr & ExpressionActionsChain::Step::actions() +ActionsAndProjectInputsFlagPtr & ExpressionActionsChain::Step::actions() { - return typeid_cast(*this).actions_dag; + return typeid_cast(*this).actions_and_flags; } -const ActionsDAGPtr & ExpressionActionsChain::Step::actions() const +const ActionsAndProjectInputsFlagPtr & ExpressionActionsChain::Step::actions() const { - return typeid_cast(*this).actions_dag; + return typeid_cast(*this).actions_and_flags; } } diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index cb467004d29..ddffe022215 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -79,11 +79,13 @@ private: ColumnNumbers result_positions; Block sample_block; + bool project_inputs = false; + ExpressionActionsSettings settings; public: ExpressionActions() = delete; - explicit ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_ = {}); + explicit ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_ = {}, bool project_inputs_ = false); ExpressionActions(const ExpressionActions &) = default; ExpressionActions & operator=(const ExpressionActions &) = default; @@ -173,48 +175,49 @@ struct ExpressionActionsChain : WithContext /// Remove unused result and update required columns virtual void finalize(const NameSet & required_output_) = 0; /// Add projections to expression - virtual void prependProjectInput() const = 0; + virtual void prependProjectInput() = 0; virtual std::string dump() const = 0; /// Only for ExpressionActionsStep - ActionsDAGPtr & actions(); - const ActionsDAGPtr & actions() const; + ActionsAndProjectInputsFlagPtr & actions(); + const ActionsAndProjectInputsFlagPtr & actions() const; }; struct ExpressionActionsStep : public Step { - ActionsDAGPtr actions_dag; + ActionsAndProjectInputsFlagPtr actions_and_flags; + bool is_final_projection = false; - explicit ExpressionActionsStep(ActionsDAGPtr actions_dag_, Names required_output_ = Names()) + explicit ExpressionActionsStep(ActionsAndProjectInputsFlagPtr actiactions_and_flags_, Names required_output_ = Names()) : Step(std::move(required_output_)) - , actions_dag(std::move(actions_dag_)) + , actions_and_flags(std::move(actiactions_and_flags_)) { } NamesAndTypesList getRequiredColumns() const override { - return actions_dag->getRequiredColumns(); + return actions_and_flags->dag.getRequiredColumns(); } ColumnsWithTypeAndName getResultColumns() const override { - return actions_dag->getResultColumns(); + return actions_and_flags->dag.getResultColumns(); } void finalize(const NameSet & required_output_) override { - if (!actions_dag->isOutputProjected()) - actions_dag->removeUnusedActions(required_output_); + if (!is_final_projection) + actions_and_flags->dag.removeUnusedActions(required_output_); } - void prependProjectInput() const override + void prependProjectInput() override { - actions_dag->projectInput(); + actions_and_flags->project_input = true; } std::string dump() const override { - return actions_dag->dumpDAG(); + return actions_and_flags->dag.dumpDAG(); } }; @@ -229,7 +232,7 @@ struct ExpressionActionsChain : WithContext NamesAndTypesList getRequiredColumns() const override { return required_columns; } ColumnsWithTypeAndName getResultColumns() const override { return result_columns; } void finalize(const NameSet & required_output_) override; - void prependProjectInput() const override {} /// TODO: remove unused columns before ARRAY JOIN ? + void prependProjectInput() override {} /// TODO: remove unused columns before ARRAY JOIN ? std::string dump() const override { return "ARRAY JOIN"; } }; @@ -245,7 +248,7 @@ struct ExpressionActionsChain : WithContext NamesAndTypesList getRequiredColumns() const override { return required_columns; } ColumnsWithTypeAndName getResultColumns() const override { return result_columns; } void finalize(const NameSet & required_output_) override; - void prependProjectInput() const override {} /// TODO: remove unused columns before JOIN ? + void prependProjectInput() override {} /// TODO: remove unused columns before JOIN ? std::string dump() const override { return "JOIN"; } }; @@ -263,7 +266,7 @@ struct ExpressionActionsChain : WithContext steps.clear(); } - ActionsDAGPtr getLastActions(bool allow_empty = false) + ExpressionActionsStep * getLastExpressionStep(bool allow_empty = false) { if (steps.empty()) { @@ -272,7 +275,15 @@ struct ExpressionActionsChain : WithContext throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty ExpressionActionsChain"); } - return typeid_cast(steps.back().get())->actions_dag; + return typeid_cast(steps.back().get()); + } + + ActionsAndProjectInputsFlagPtr getLastActions(bool allow_empty = false) + { + if (auto * step = getLastExpressionStep(allow_empty)) + return step->actions_and_flags; + + return nullptr; } Step & getLastStep() @@ -286,10 +297,15 @@ struct ExpressionActionsChain : WithContext Step & lastStep(const NamesAndTypesList & columns) { if (steps.empty()) - steps.emplace_back(std::make_unique(std::make_shared(columns))); + return addStep(columns); return *steps.back(); } + Step & addStep(const NamesAndTypesList & columns) + { + return *steps.emplace_back(std::make_unique(std::make_shared(ActionsDAG(columns), false))); + } + std::string dumpChain() const; }; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index d80d5cd5b93..62cddd9caf7 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -186,7 +186,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( /// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers. initGlobalSubqueriesAndExternalTables(do_global, is_explain); - auto temp_actions = std::make_shared(sourceColumns()); + ActionsDAG temp_actions(sourceColumns()); columns_after_array_join = getColumnsAfterArrayJoin(temp_actions, sourceColumns()); columns_after_join = analyzeJoin(temp_actions, columns_after_array_join); /// has_aggregation, aggregation_keys, aggregate_descriptions, aggregated_columns. @@ -199,7 +199,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( analyzeAggregation(temp_actions); } -NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns) +NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAG & actions, const NamesAndTypesList & src_columns) { const auto * select_query = query->as(); if (!select_query) @@ -213,14 +213,14 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & a getRootActionsNoMakeSet(array_join_expression_list, actions, false); auto array_join = addMultipleArrayJoinAction(actions, is_array_join_left); - auto sample_columns = actions->getResultColumns(); + auto sample_columns = actions.getResultColumns(); array_join->prepare(sample_columns); - actions = std::make_shared(sample_columns); + actions = ActionsDAG(sample_columns); NamesAndTypesList new_columns_after_array_join; NameSet added_columns; - for (auto & column : actions->getResultColumns()) + for (auto & column : actions.getResultColumns()) { if (syntax->array_join_result_to_source.contains(column.name)) { @@ -236,7 +236,7 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & a return new_columns_after_array_join; } -NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns) +NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAG & actions, const NamesAndTypesList & src_columns) { const auto * select_query = query->as(); if (!select_query) @@ -246,9 +246,9 @@ NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const if (join) { getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), actions, false); - auto sample_columns = actions->getNamesAndTypesList(); + auto sample_columns = actions.getNamesAndTypesList(); syntax->analyzed_join->addJoinedColumnsAndCorrectTypes(sample_columns, true); - actions = std::make_shared(sample_columns); + actions = ActionsDAG(sample_columns); } NamesAndTypesList result_columns = src_columns; @@ -256,7 +256,7 @@ NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const return result_columns; } -void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) +void ExpressionAnalyzer::analyzeAggregation(ActionsDAG & temp_actions) { /** Find aggregation keys (aggregation_keys), information about aggregate functions (aggregate_descriptions), * as well as a set of columns obtained after the aggregation, if any, @@ -272,7 +272,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) if (!has_aggregation) { - aggregated_columns = temp_actions->getNamesAndTypesList(); + aggregated_columns = temp_actions.getNamesAndTypesList(); return; } @@ -321,7 +321,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) ssize_t group_size = group_elements_ast.size(); const auto & column_name = group_elements_ast[j]->getColumnName(); - const auto * node = temp_actions->tryFindInOutputs(column_name); + const auto * node = temp_actions.tryFindInOutputs(column_name); if (!node) throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier (in GROUP BY): {}", column_name); @@ -375,7 +375,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) getRootActionsNoMakeSet(group_asts[i], temp_actions, false); const auto & column_name = group_asts[i]->getColumnName(); - const auto * node = temp_actions->tryFindInOutputs(column_name); + const auto * node = temp_actions.tryFindInOutputs(column_name); if (!node) throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier (in GROUP BY): {}", column_name); @@ -434,7 +434,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) has_const_aggregation_keys = select_query->group_by_with_constant_keys; } else - aggregated_columns = temp_actions->getNamesAndTypesList(); + aggregated_columns = temp_actions.getNamesAndTypesList(); for (const auto & desc : aggregate_descriptions) aggregated_columns.emplace_back(desc.column_name, desc.function->getResultType()); @@ -465,7 +465,7 @@ SetPtr ExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_o return storage_set->getSet(); } -void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data( @@ -485,7 +485,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ actions = visitor_data.getActions(); } -void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAG & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data( @@ -507,7 +507,7 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP void ExpressionAnalyzer::getRootActionsForHaving( - const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) + const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data( @@ -528,7 +528,7 @@ void ExpressionAnalyzer::getRootActionsForHaving( } -void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions) +void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions) { LogAST log; ActionsVisitor::Data visitor_data( @@ -548,7 +548,7 @@ void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bo } -void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, AggregateDescriptions & descriptions) +void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAG & actions, AggregateDescriptions & descriptions) { for (const ASTPtr & ast : aggregates()) { @@ -567,7 +567,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr for (size_t i = 0; i < arguments.size(); ++i) { const std::string & name = arguments[i]->getColumnName(); - const auto * dag_node = actions->tryFindInOutputs(name); + const auto * dag_node = actions.tryFindInOutputs(name); if (!dag_node) { throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, @@ -659,7 +659,7 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, 1 /* nulls_direction */)); auto actions_dag = std::make_shared(aggregated_columns); - getRootActions(column_ast, false, actions_dag); + getRootActions(column_ast, false, *actions_dag); desc.partition_by_actions.push_back(std::move(actions_dag)); } } @@ -680,7 +680,7 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, order_by_element.nulls_direction)); auto actions_dag = std::make_shared(aggregated_columns); - getRootActions(column_ast, false, actions_dag); + getRootActions(column_ast, false, *actions_dag); desc.order_by_actions.push_back(std::move(actions_dag)); } } @@ -720,7 +720,7 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, } } -void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) +void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAG & actions) { auto current_context = getContext(); @@ -737,13 +737,13 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) desc, elem.definition.get()); auto [it, inserted] = window_descriptions.insert( - {desc.window_name, desc}); + {elem.name, std::move(desc)}); if (!inserted) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window '{}' is defined twice in the WINDOW clause", - desc.window_name); + elem.name); } } } @@ -776,7 +776,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) for (size_t i = 0; i < arguments.size(); ++i) { const std::string & name = arguments[i]->getColumnName(); - const auto * node = actions->tryFindInOutputs(name); + const auto * node = actions.tryFindInOutputs(name); if (!node) { @@ -817,13 +817,14 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) { const auto & definition = function_node.window_definition->as< const ASTWindowDefinition &>(); + auto default_window_name = definition.getDefaultWindowName(); WindowDescription desc; - desc.window_name = definition.getDefaultWindowName(); + desc.window_name = default_window_name; makeWindowDescriptionFromAST(*current_context, window_descriptions, desc, &definition); auto [it, inserted] = window_descriptions.insert( - {desc.window_name, desc}); + {default_window_name, desc}); if (!inserted) { @@ -871,7 +872,7 @@ const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() cons } /// "Big" ARRAY JOIN. -ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool array_join_is_left) const +ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAG & actions, bool array_join_is_left) const { NameSet result_columns; for (const auto & result_source : syntax->array_join_result_to_source) @@ -879,8 +880,8 @@ ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr /// Assign new names to columns, if needed. if (result_source.first != result_source.second) { - const auto & node = actions->findInOutputs(result_source.second); - actions->getOutputs().push_back(&actions->addAlias(node, result_source.first)); + const auto & node = actions.findInOutputs(result_source.second); + actions.getOutputs().push_back(&actions.addAlias(node, result_source.first)); } /// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names. @@ -890,7 +891,7 @@ ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr return std::make_shared(result_columns, array_join_is_left, getContext()); } -ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types) +ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, ActionsAndProjectInputsFlagPtr & before_array_join, bool only_types) { const auto * select_query = getSelectQuery(); @@ -900,9 +901,9 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns()); - getRootActions(array_join_expression_list, only_types, step.actions()); + getRootActions(array_join_expression_list, only_types, step.actions()->dag); - auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left); + auto array_join = addMultipleArrayJoinAction(step.actions()->dag, is_array_join_left); before_array_join = chain.getLastActions(); chain.steps.push_back(std::make_unique(array_join, step.getResultColumns())); @@ -916,20 +917,23 @@ bool SelectQueryExpressionAnalyzer::appendJoinLeftKeys(ExpressionActionsChain & { ExpressionActionsChain::Step & step = chain.lastStep(columns_after_array_join); - getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions()); + getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions()->dag); return true; } JoinPtr SelectQueryExpressionAnalyzer::appendJoin( ExpressionActionsChain & chain, - ActionsDAGPtr & converting_join_columns) + ActionsAndProjectInputsFlagPtr & converting_join_columns) { const ColumnsWithTypeAndName & left_sample_columns = chain.getLastStep().getResultColumns(); - JoinPtr join = makeJoin(*syntax->ast_join, left_sample_columns, converting_join_columns); + ActionsDAGPtr converting_actions; + JoinPtr join = makeJoin(*syntax->ast_join, left_sample_columns, converting_actions); - if (converting_join_columns) + if (converting_actions) { + converting_join_columns = std::make_shared(); + converting_join_columns->dag = std::move(*converting_actions); chain.steps.push_back(std::make_unique(converting_join_columns)); chain.addStep(); } @@ -1065,7 +1069,7 @@ static std::unique_ptr buildJoinedPlan( rename_dag->getOutputs()[pos] = &alias; } } - rename_dag->projectInput(); + rename_dag->appendInputsForUnusedColumns(joined_plan->getCurrentDataStream().header); auto rename_step = std::make_unique(joined_plan->getCurrentDataStream(), std::move(rename_dag)); rename_step->setStepDescription("Rename joined columns"); joined_plan->addStep(std::move(rename_step)); @@ -1166,45 +1170,45 @@ JoinPtr SelectQueryExpressionAnalyzer::makeJoin( return join; } -ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( +ActionsAndProjectInputsFlagPtr SelectQueryExpressionAnalyzer::appendPrewhere( ExpressionActionsChain & chain, bool only_types) { const auto * select_query = getSelectQuery(); if (!select_query->prewhere()) - return nullptr; + return {}; Names first_action_names; if (!chain.steps.empty()) first_action_names = chain.steps.front()->getRequiredColumns().getNames(); auto & step = chain.lastStep(sourceColumns()); - getRootActions(select_query->prewhere(), only_types, step.actions()); + getRootActions(select_query->prewhere(), only_types, step.actions()->dag); String prewhere_column_name = select_query->prewhere()->getColumnName(); step.addRequiredOutput(prewhere_column_name); - const auto & node = step.actions()->findInOutputs(prewhere_column_name); + const auto & node = step.actions()->dag.findInOutputs(prewhere_column_name); auto filter_type = node.result_type; if (!filter_type->canBeUsedInBooleanContext()) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", filter_type->getName()); - ActionsDAGPtr prewhere_actions; + ActionsAndProjectInputsFlagPtr prewhere_actions; { /// Remove unused source_columns from prewhere actions. - auto tmp_actions_dag = std::make_shared(sourceColumns()); + ActionsDAG tmp_actions_dag(sourceColumns()); getRootActions(select_query->prewhere(), only_types, tmp_actions_dag); /// Constants cannot be removed since they can be used in other parts of the query. /// And if they are not used anywhere, except PREWHERE, they will be removed on the next step. - tmp_actions_dag->removeUnusedActions( + tmp_actions_dag.removeUnusedActions( NameSet{prewhere_column_name}, /* allow_remove_inputs= */ true, /* allow_constant_folding= */ false); - auto required_columns = tmp_actions_dag->getRequiredColumnsNames(); + auto required_columns = tmp_actions_dag.getRequiredColumnsNames(); NameSet required_source_columns(required_columns.begin(), required_columns.end()); required_source_columns.insert(first_action_names.begin(), first_action_names.end()); - auto names = step.actions()->getNames(); + auto names = step.actions()->dag.getNames(); NameSet name_set(names.begin(), names.end()); for (const auto & column : sourceColumns()) @@ -1213,13 +1217,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( Names required_output(name_set.begin(), name_set.end()); prewhere_actions = chain.getLastActions(); - prewhere_actions->removeUnusedActions(required_output); + prewhere_actions->dag.removeUnusedActions(required_output); } { - ActionsDAGPtr actions; + auto actions = std::make_shared(); - auto required_columns = prewhere_actions->getRequiredColumns(); + auto required_columns = prewhere_actions->dag.getRequiredColumns(); NameSet prewhere_input_names; for (const auto & col : required_columns) prewhere_input_names.insert(col.name); @@ -1263,11 +1267,11 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( } } - actions = std::make_shared(std::move(required_columns)); + actions->dag = ActionsDAG(required_columns); } else { - ColumnsWithTypeAndName columns = prewhere_actions->getResultColumns(); + ColumnsWithTypeAndName columns = prewhere_actions->dag.getResultColumns(); for (const auto & column : sourceColumns()) { @@ -1278,7 +1282,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( } } - actions = std::make_shared(std::move(columns)); + actions->dag = ActionsDAG(columns); } chain.steps.emplace_back( @@ -1300,12 +1304,12 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(columns_after_join); - getRootActions(select_query->where(), only_types, step.actions()); + getRootActions(select_query->where(), only_types, step.actions()->dag); auto where_column_name = select_query->where()->getColumnName(); step.addRequiredOutput(where_column_name); - const auto & node = step.actions()->findInOutputs(where_column_name); + const auto & node = step.actions()->dag.findInOutputs(where_column_name); auto filter_type = node.result_type; if (!filter_type->canBeUsedInBooleanContext()) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in WHERE: {}", @@ -1332,7 +1336,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain for (const auto & ast_element : ast->children) { step.addRequiredOutput(ast_element->getColumnName()); - getRootActions(ast_element, only_types, step.actions()); + getRootActions(ast_element, only_types, step.actions()->dag); } } } @@ -1341,7 +1345,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain for (const auto & ast : asts) { step.addRequiredOutput(ast->getColumnName()); - getRootActions(ast, only_types, step.actions()); + getRootActions(ast, only_types, step.actions()->dag); } } @@ -1350,7 +1354,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain for (auto & child : asts) { auto actions_dag = std::make_shared(columns_after_join); - getRootActions(child, only_types, actions_dag); + getRootActions(child, only_types, *actions_dag); group_by_elements_actions.emplace_back( std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); } @@ -1387,7 +1391,7 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression const ASTFunction & node = typeid_cast(*ast); if (node.arguments) for (auto & argument : node.arguments->children) - getRootActions(argument, only_types, step.actions()); + getRootActions(argument, only_types, step.actions()->dag); } } @@ -1409,7 +1413,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( // recursively together with (1b) as ASTFunction::window_definition. if (getSelectQuery()->window()) { - getRootActionsNoMakeSet(getSelectQuery()->window(), step.actions()); + getRootActionsNoMakeSet(getSelectQuery()->window(), step.actions()->dag); } for (const auto & [_, w] : window_descriptions) @@ -1420,7 +1424,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( // definitions (1a). // Requiring a constant reference to a shared pointer to non-const AST // doesn't really look sane, but the visitor does indeed require it. - getRootActionsNoMakeSet(f.function_node->clone(), step.actions()); + getRootActionsNoMakeSet(f.function_node->clone(), step.actions()->dag); // (2b) Required function argument columns. for (const auto & a : f.function_node->arguments->children) @@ -1442,17 +1446,17 @@ void SelectQueryExpressionAnalyzer::appendExpressionsAfterWindowFunctions(Expres ExpressionActionsChain::Step & step = chain.lastStep(columns_after_window); for (const auto & expression : syntax->expressions_with_window_function) - getRootActionsForWindowFunctions(expression->clone(), true, step.actions()); + getRootActionsForWindowFunctions(expression->clone(), true, step.actions()->dag); } -void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAGPtr & before_aggregation, ExpressionActionsChain & chain, bool /* only_types */) +void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAG & before_aggregation, ExpressionActionsChain & chain, bool /* only_types */) { const auto * select_query = getAggregatingQuery(); if (!select_query->groupBy() || !(select_query->group_by_with_rollup || select_query->group_by_with_cube)) return; - auto source_columns = before_aggregation->getResultColumns(); + auto source_columns = before_aggregation.getResultColumns(); ColumnsWithTypeAndName result_columns; for (const auto & source_column : source_columns) @@ -1462,9 +1466,11 @@ void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAGPtr & befor else result_columns.push_back(source_column); } - ExpressionActionsChain::Step & step = chain.lastStep(before_aggregation->getNamesAndTypesList()); + auto required_output = chain.getLastStep().required_output; + ExpressionActionsChain::Step & step = chain.addStep(before_aggregation.getNamesAndTypesList()); + step.required_output = std::move(required_output); - step.actions() = ActionsDAG::makeConvertingActions(source_columns, result_columns, ActionsDAG::MatchColumnsMode::Position); + step.actions()->dag = std::move(*ActionsDAG::makeConvertingActions(source_columns, result_columns, ActionsDAG::MatchColumnsMode::Position)); } void SelectQueryExpressionAnalyzer::appendSelectSkipWindowExpressions(ExpressionActionsChain::Step & step, ASTPtr const & node) @@ -1495,7 +1501,7 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); - getRootActionsForHaving(select_query->having(), only_types, step.actions()); + getRootActionsForHaving(select_query->having(), only_types, step.actions()->dag); step.addRequiredOutput(select_query->having()->getColumnName()); @@ -1508,13 +1514,13 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); - getRootActions(select_query->select(), only_types, step.actions()); + getRootActions(select_query->select(), only_types, step.actions()->dag); for (const auto & child : select_query->select()->children) appendSelectSkipWindowExpressions(step, child); } -ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, +ActionsAndProjectInputsFlagPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions & order_by_elements_actions) { const auto * select_query = getSelectQuery(); @@ -1538,7 +1544,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai replaceForPositionalArguments(ast->children.at(0), select_query, ASTSelectQuery::Expression::ORDER_BY); } - getRootActions(select_query->orderBy(), only_types, step.actions()); + getRootActions(select_query->orderBy(), only_types, step.actions()->dag); bool with_fill = false; @@ -1601,7 +1607,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai for (const auto & child : select_query->orderBy()->children) { auto actions_dag = std::make_shared(columns_after_join); - getRootActions(child, only_types, actions_dag); + getRootActions(child, only_types, *actions_dag); order_by_elements_actions.emplace_back( std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); } @@ -1628,7 +1634,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); - getRootActions(select_query->limitBy(), only_types, step.actions()); + getRootActions(select_query->limitBy(), only_types, step.actions()->dag); NameSet existing_column_names; for (const auto & column : aggregated_columns) @@ -1657,7 +1663,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain return true; } -ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const +ActionsAndProjectInputsFlagPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const { const auto * select_query = getSelectQuery(); @@ -1705,17 +1711,20 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio } } - auto actions = chain.getLastActions(); - actions->project(result_columns); + auto * last_step = chain.getLastExpressionStep(); + auto & actions = last_step->actions_and_flags; + actions->dag.project(result_columns); if (!required_result_columns.empty()) { result_columns.clear(); for (const auto & column : required_result_columns) result_columns.emplace_back(column, std::string{}); - actions->project(result_columns); + actions->dag.project(result_columns); } + actions->project_input = true; + last_step->is_final_projection = true; return actions; } @@ -1723,14 +1732,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types) { ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns()); - getRootActions(expr, only_types, step.actions()); + getRootActions(expr, only_types, step.actions()->dag); step.addRequiredOutput(expr->getColumnName()); } - -ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_result) +ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool remove_unused_result) { - auto actions_dag = std::make_shared(aggregated_columns); + ActionsDAG actions_dag(aggregated_columns); NamesWithAliases result_columns; Names result_names; @@ -1756,13 +1764,15 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r if (add_aliases) { - if (project_result) - actions_dag->project(result_columns); + if (remove_unused_result) + { + actions_dag.project(result_columns); + } else - actions_dag->addAliases(result_columns); + actions_dag.addAliases(result_columns); } - if (!(add_aliases && project_result)) + if (!(add_aliases && remove_unused_result)) { NameSet name_set(result_names.begin(), result_names.end()); /// We will not delete the original columns. @@ -1775,22 +1785,22 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r } } - actions_dag->removeUnusedActions(name_set); + actions_dag.removeUnusedActions(name_set); } - return actions_dag; + return std::make_unique(std::move(actions_dag)); } -ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result, CompileExpressions compile_expressions) +ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool remove_unused_result, CompileExpressions compile_expressions) { return std::make_shared( - getActionsDAG(add_aliases, project_result), ExpressionActionsSettings::fromContext(getContext(), compile_expressions)); + getActionsDAG(add_aliases, remove_unused_result), ExpressionActionsSettings::fromContext(getContext(), compile_expressions), add_aliases && remove_unused_result); } ActionsDAGPtr ExpressionAnalyzer::getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs) { auto actions = std::make_shared(constant_inputs); - getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */); + getRootActions(query, true /* no_makeset_for_subqueries */, *actions, true /* only_consts */); return actions; } @@ -1805,7 +1815,7 @@ std::unique_ptr SelectQueryExpressionAnalyzer::getJoinedPlan() return std::move(joined_plan); } -ActionsDAGPtr SelectQueryExpressionAnalyzer::simpleSelectActions() +ActionsAndProjectInputsFlagPtr SelectQueryExpressionAnalyzer::simpleSelectActions() { ExpressionActionsChain new_chain(getContext()); appendSelect(new_chain, false); @@ -1845,14 +1855,16 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( ssize_t where_step_num = -1; ssize_t having_step_num = -1; + ActionsAndProjectInputsFlagPtr prewhere_dag_and_flags; + auto finalize_chain = [&](ExpressionActionsChain & chain) -> ColumnsWithTypeAndName { if (prewhere_step_num >= 0) { ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num); - auto required_columns_ = prewhere_info->prewhere_actions->getRequiredColumnsNames(); - NameSet required_source_columns(required_columns_.begin(), required_columns_.end()); + auto prewhere_required_columns = prewhere_dag_and_flags->dag.getRequiredColumnsNames(); + NameSet required_source_columns(prewhere_required_columns.begin(), prewhere_required_columns.end()); /// Add required columns to required output in order not to remove them after prewhere execution. /// TODO: add sampling and final execution to common chain. for (const auto & column : additional_required_columns_after_prewhere) @@ -1864,6 +1876,13 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( chain.finalize(); + if (prewhere_dag_and_flags) + { + auto dag = std::make_shared(std::move(prewhere_dag_and_flags->dag)); + prewhere_info = std::make_shared(std::move(dag), query.prewhere()->getColumnName()); + prewhere_dag_and_flags.reset(); + } + finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); auto res = chain.getLastStep().getResultColumns(); @@ -1914,19 +1933,19 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( filter_info->do_remove_column = true; } - if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage)) + if (prewhere_dag_and_flags = query_analyzer.appendPrewhere(chain, !first_stage); prewhere_dag_and_flags) { /// Prewhere is always the first one. prewhere_step_num = 0; - prewhere_info = std::make_shared(actions, query.prewhere()->getColumnName()); - if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings)) + if (allowEarlyConstantFolding(prewhere_dag_and_flags->dag, settings)) { Block before_prewhere_sample = source_header; if (sanitizeBlock(before_prewhere_sample)) { + auto dag = prewhere_dag_and_flags->dag.clone(); ExpressionActions( - prewhere_info->prewhere_actions, + dag, ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample); auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName()); /// If the filter column is a constant, record it. @@ -1950,7 +1969,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( { where_step_num = chain.steps.size() - 1; before_where = chain.getLastActions(); - if (allowEarlyConstantFolding(*before_where, settings)) + if (allowEarlyConstantFolding(before_where->dag, settings)) { Block before_where_sample; if (chain.steps.size() > 1) @@ -1960,7 +1979,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (sanitizeBlock(before_where_sample)) { ExpressionActions( - before_where, + before_where->dag.clone(), ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); auto & column_elem @@ -1986,7 +2005,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( before_aggregation = chain.getLastActions(); if (settings.group_by_use_nulls) - query_analyzer.appendGroupByModifiers(before_aggregation, chain, only_types); + query_analyzer.appendGroupByModifiers(before_aggregation->dag, chain, only_types); auto columns_before_aggregation = finalize_chain(chain); @@ -2033,8 +2052,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( true); auto & step = chain.lastStep(query_analyzer.aggregated_columns); - auto & actions = step.actions(); - actions = ActionsDAG::merge(std::move(*actions), std::move(*converting)); + auto & actions = step.actions()->dag; + actions = std::move(*ActionsDAG::merge(std::move(actions), std::move(*converting))); } } @@ -2070,13 +2089,13 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( // the main SELECT, similar to what we do for aggregate functions. if (has_window) { - query_analyzer.makeWindowDescriptions(chain.getLastActions()); + query_analyzer.makeWindowDescriptions(chain.getLastActions()->dag); query_analyzer.appendWindowFunctionsArguments(chain, only_types || !first_stage); // Build a list of output columns of the window step. // 1) We need the columns that are the output of ExpressionActions. - for (const auto & x : chain.getLastActions()->getNamesAndTypesList()) + for (const auto & x : chain.getLastActions()->dag.getNamesAndTypesList()) { query_analyzer.columns_after_window.push_back(x); } @@ -2113,7 +2132,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( finalize_chain(chain); query_analyzer.appendExpressionsAfterWindowFunctions(chain, only_types || !first_stage); - for (const auto & x : chain.getLastActions()->getNamesAndTypesList()) + for (const auto & x : chain.getLastActions()->dag.getNamesAndTypesList()) { query_analyzer.columns_after_window.push_back(x); } @@ -2173,7 +2192,6 @@ void ExpressionAnalysisResult::finalize( if (prewhere_step_num >= 0) { const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num); - prewhere_info->prewhere_actions->projectInput(false); NameSet columns_to_remove; for (const auto & [name, can_remove] : step.required_output) @@ -2206,9 +2224,9 @@ void ExpressionAnalysisResult::finalize( void ExpressionAnalysisResult::removeExtraColumns() const { if (hasWhere()) - before_where->projectInput(); + before_where->project_input = true; if (hasHaving()) - before_having->projectInput(); + before_having->project_input = true; } void ExpressionAnalysisResult::checkActions() const @@ -2238,7 +2256,7 @@ std::string ExpressionAnalysisResult::dump() const if (before_array_join) { - ss << "before_array_join " << before_array_join->dumpDAG() << "\n"; + ss << "before_array_join " << before_array_join->dag.dumpDAG() << "\n"; } if (array_join) @@ -2248,12 +2266,12 @@ std::string ExpressionAnalysisResult::dump() const if (before_join) { - ss << "before_join " << before_join->dumpDAG() << "\n"; + ss << "before_join " << before_join->dag.dumpDAG() << "\n"; } if (before_where) { - ss << "before_where " << before_where->dumpDAG() << "\n"; + ss << "before_where " << before_where->dag.dumpDAG() << "\n"; } if (prewhere_info) @@ -2268,32 +2286,32 @@ std::string ExpressionAnalysisResult::dump() const if (before_aggregation) { - ss << "before_aggregation " << before_aggregation->dumpDAG() << "\n"; + ss << "before_aggregation " << before_aggregation->dag.dumpDAG() << "\n"; } if (before_having) { - ss << "before_having " << before_having->dumpDAG() << "\n"; + ss << "before_having " << before_having->dag.dumpDAG() << "\n"; } if (before_window) { - ss << "before_window " << before_window->dumpDAG() << "\n"; + ss << "before_window " << before_window->dag.dumpDAG() << "\n"; } if (before_order_by) { - ss << "before_order_by " << before_order_by->dumpDAG() << "\n"; + ss << "before_order_by " << before_order_by->dag.dumpDAG() << "\n"; } if (before_limit_by) { - ss << "before_limit_by " << before_limit_by->dumpDAG() << "\n"; + ss << "before_limit_by " << before_limit_by->dag.dumpDAG() << "\n"; } if (final_projection) { - ss << "final_projection " << final_projection->dumpDAG() << "\n"; + ss << "final_projection " << final_projection->dag.dumpDAG() << "\n"; } if (!selected_columns.empty()) diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 941194e69ff..12d6dce8f72 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -115,10 +115,10 @@ public: /// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression. /// If add_aliases, only the calculated values in the desired order and add aliases. - /// If also project_result, than only aliases remain in the output block. + /// If also remove_unused_result, than only aliases remain in the output block. /// Otherwise, only temporary columns will be deleted from the block. - ActionsDAGPtr getActionsDAG(bool add_aliases, bool project_result = true); - ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true, CompileExpressions compile_expressions = CompileExpressions::no); + ActionsDAGPtr getActionsDAG(bool add_aliases, bool remove_unused_result = true); + ExpressionActionsPtr getActions(bool add_aliases, bool remove_unused_result = true, CompileExpressions compile_expressions = CompileExpressions::no); /// Get actions to evaluate a constant expression. The function adds constants and applies functions that depend only on constants. /// Does not execute subqueries. @@ -139,7 +139,7 @@ public: const WindowDescriptions & windowDescriptions() const { return window_descriptions; } void makeWindowDescriptionFromAST(const Context & context, const WindowDescriptions & existing_descriptions, WindowDescription & desc, const IAST * ast); - void makeWindowDescriptions(ActionsDAGPtr actions); + void makeWindowDescriptions(ActionsDAG & actions); /** Checks if subquery is not a plain StorageSet. * Because while making set we will read data from StorageSet which is not allowed. @@ -172,34 +172,34 @@ protected: /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables. void initGlobalSubqueriesAndExternalTables(bool do_global, bool is_explain); - ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const; + ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAG & actions, bool is_left) const; - void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions, bool only_consts = false); /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the * prepared sets would not be applicable for MergeTree index optimization. */ - void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAG & actions, bool only_consts = false); - void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions, bool only_consts = false); - void getRootActionsForWindowFunctions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions); + void getRootActionsForWindowFunctions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions); /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, * Create a set of columns aggregated_columns resulting after the aggregation, if any, * or after all the actions that are normally performed before aggregation. * Set has_aggregation = true if there is GROUP BY or at least one aggregate function. */ - void analyzeAggregation(ActionsDAGPtr & temp_actions); - void makeAggregateDescriptions(ActionsDAGPtr & actions, AggregateDescriptions & descriptions); + void analyzeAggregation(ActionsDAG & temp_actions); + void makeAggregateDescriptions(ActionsDAG & actions, AggregateDescriptions & descriptions); const ASTSelectQuery * getSelectQuery() const; bool isRemoteStorage() const; - NamesAndTypesList getColumnsAfterArrayJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns); - NamesAndTypesList analyzeJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns); + NamesAndTypesList getColumnsAfterArrayJoin(ActionsDAG & actions, const NamesAndTypesList & src_columns); + NamesAndTypesList analyzeJoin(ActionsDAG & actions, const NamesAndTypesList & src_columns); AggregationKeysInfo getAggregationKeysInfo() const noexcept { @@ -231,20 +231,20 @@ struct ExpressionAnalysisResult bool use_grouping_set_key = false; - ActionsDAGPtr before_array_join; + ActionsAndProjectInputsFlagPtr before_array_join; ArrayJoinActionPtr array_join; - ActionsDAGPtr before_join; - ActionsDAGPtr converting_join_columns; + ActionsAndProjectInputsFlagPtr before_join; + ActionsAndProjectInputsFlagPtr converting_join_columns; JoinPtr join; - ActionsDAGPtr before_where; - ActionsDAGPtr before_aggregation; - ActionsDAGPtr before_having; + ActionsAndProjectInputsFlagPtr before_where; + ActionsAndProjectInputsFlagPtr before_aggregation; + ActionsAndProjectInputsFlagPtr before_having; String having_column_name; bool remove_having_filter = false; - ActionsDAGPtr before_window; - ActionsDAGPtr before_order_by; - ActionsDAGPtr before_limit_by; - ActionsDAGPtr final_projection; + ActionsAndProjectInputsFlagPtr before_window; + ActionsAndProjectInputsFlagPtr before_order_by; + ActionsAndProjectInputsFlagPtr before_limit_by; + ActionsAndProjectInputsFlagPtr final_projection; /// Columns from the SELECT list, before renaming them to aliases. Used to /// perform SELECT DISTINCT. @@ -351,12 +351,12 @@ public: /// Tables that will need to be sent to remote servers for distributed query processing. const TemporaryTablesMapping & getExternalTables() const { return external_tables; } - ActionsDAGPtr simpleSelectActions(); + ActionsAndProjectInputsFlagPtr simpleSelectActions(); /// These appends are public only for tests void appendSelect(ExpressionActionsChain & chain, bool only_types); /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. - ActionsDAGPtr appendProjectResult(ExpressionActionsChain & chain) const; + ActionsAndProjectInputsFlagPtr appendProjectResult(ExpressionActionsChain & chain) const; private: StorageMetadataPtr metadata_snapshot; @@ -386,13 +386,13 @@ private: */ /// Before aggregation: - ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types); + ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsAndProjectInputsFlagPtr & before_array_join, bool only_types); bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types); - JoinPtr appendJoin(ExpressionActionsChain & chain, ActionsDAGPtr & converting_join_columns); + JoinPtr appendJoin(ExpressionActionsChain & chain, ActionsAndProjectInputsFlagPtr & converting_join_columns); /// remove_filter is set in ExpressionActionsChain::finalize(); /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). - ActionsDAGPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types); + ActionsAndProjectInputsFlagPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types); bool appendWhere(ExpressionActionsChain & chain, bool only_types); bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &); void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types); @@ -401,12 +401,12 @@ private: void appendExpressionsAfterWindowFunctions(ExpressionActionsChain & chain, bool only_types); void appendSelectSkipWindowExpressions(ExpressionActionsChain::Step & step, ASTPtr const & node); - void appendGroupByModifiers(ActionsDAGPtr & before_aggregation, ExpressionActionsChain & chain, bool only_types); + void appendGroupByModifiers(ActionsDAG & before_aggregation, ExpressionActionsChain & chain, bool only_types); /// After aggregation: bool appendHaving(ExpressionActionsChain & chain, bool only_types); /// appendSelect - ActionsDAGPtr appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions &); + ActionsAndProjectInputsFlagPtr appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions &); bool appendLimitBy(ExpressionActionsChain & chain, bool only_types); /// appendProjectResult }; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a78f6cc39ef..a990eb651ce 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -88,6 +88,11 @@ #include #include +namespace CurrentMetrics +{ + extern const Metric AttachedTable; +} + namespace DB { @@ -113,6 +118,8 @@ namespace ErrorCodes extern const int UNKNOWN_STORAGE; extern const int SYNTAX_ERROR; extern const int SUPPORT_IS_DISABLED; + extern const int TOO_MANY_TABLES; + extern const int TOO_MANY_DATABASES; } namespace fs = std::filesystem; @@ -138,6 +145,31 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists.", database_name); } + auto db_num_limit = getContext()->getGlobalContext()->getServerSettings().max_database_num_to_throw; + if (db_num_limit > 0) + { + size_t db_count = DatabaseCatalog::instance().getDatabases().size(); + std::vector system_databases = { + DatabaseCatalog::TEMPORARY_DATABASE, + DatabaseCatalog::SYSTEM_DATABASE, + DatabaseCatalog::INFORMATION_SCHEMA, + DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE, + DatabaseCatalog::DEFAULT_DATABASE + }; + + for (const auto & system_database : system_databases) + { + if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(system_database)) + db_count--; + } + + if (db_count >= db_num_limit) + throw Exception(ErrorCodes::TOO_MANY_DATABASES, + "Too many databases in the Clickhouse. " + "The limit (setting 'max_database_num_to_throw') is set to {}, current number of databases is {}", + db_num_limit, db_count); + } + /// Will write file with database metadata, if needed. String database_name_escaped = escapeForFileName(database_name); fs::path metadata_path = fs::weakly_canonical(getContext()->getPath()); @@ -866,6 +898,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti assert(as_database_saved.empty() && as_table_saved.empty()); std::swap(create.as_database, as_database_saved); std::swap(create.as_table, as_table_saved); + if (!as_table_saved.empty()) + create.is_create_empty = false; return properties; } @@ -1077,6 +1111,27 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data } +namespace +{ + +void addTableDependencies(const ASTCreateQuery & create, const ASTPtr & query_ptr, const ContextPtr & context) +{ + QualifiedTableName qualified_name{create.getDatabase(), create.getTable()}; + auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); + DatabaseCatalog::instance().addDependencies(qualified_name, ref_dependencies, loading_dependencies); +} + +void checkTableCanBeAddedWithNoCyclicDependencies(const ASTCreateQuery & create, const ASTPtr & query_ptr, const ContextPtr & context) +{ + QualifiedTableName qualified_name{create.getDatabase(), create.getTable()}; + auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); + DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(qualified_name, ref_dependencies, loading_dependencies); +} + +} + BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { /// Temporary tables are created out of databases. @@ -1322,11 +1377,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) return {}; /// If table has dependencies - add them to the graph - QualifiedTableName qualified_name{database_name, create.getTable()}; - auto ref_dependencies = getDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr); - auto loading_dependencies = getLoadingDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr); - DatabaseCatalog::instance().addDependencies(qualified_name, ref_dependencies, loading_dependencies); - + addTableDependencies(create, query_ptr, getContext()); return fillTableIfNeeded(create); } @@ -1478,6 +1529,9 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find UUID mapping for {}, it's a bug", create.uuid); } + /// Before actually creating the table, check if it will lead to cyclic dependencies. + checkTableCanBeAddedWithNoCyclicDependencies(create, query_ptr, getContext()); + StoragePtr res; /// NOTE: CREATE query may be rewritten by Storage creator or table function if (create.as_table_function) @@ -1543,6 +1597,17 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, } } + UInt64 table_num_limit = getContext()->getGlobalContext()->getServerSettings().max_table_num_to_throw; + if (table_num_limit > 0 && create.getDatabase() != DatabaseCatalog::SYSTEM_DATABASE) + { + UInt64 table_count = CurrentMetrics::get(CurrentMetrics::AttachedTable); + if (table_count >= table_num_limit) + throw Exception(ErrorCodes::TOO_MANY_TABLES, + "Too many tables in the Clickhouse. " + "The limit (setting 'max_table_num_to_throw') is set to {}, current number of tables is {}", + table_num_limit, table_count); + } + database->createTable(getContext(), create.getTable(), res, query_ptr); /// Move table data to the proper place. Wo do not move data earlier to avoid situations @@ -1578,6 +1643,9 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create, ContextMutablePtr create_context = Context::createCopy(current_context); create_context->setQueryContext(std::const_pointer_cast(current_context)); + /// Before actually creating/replacing the table, check if it will lead to cyclic dependencies. + checkTableCanBeAddedWithNoCyclicDependencies(create, query_ptr, create_context); + auto make_drop_context = [&]() -> ContextMutablePtr { ContextMutablePtr drop_context = Context::createCopy(current_context); @@ -1624,6 +1692,9 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create, assert(done); created = true; + /// If table has dependencies - add them to the graph + addTableDependencies(create, query_ptr, getContext()); + /// Try fill temporary table BlockIO fill_io = fillTableIfNeeded(create); executeTrivialBlockIO(fill_io, getContext()); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 927bafe4bfb..f396db70d21 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -26,7 +26,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -625,9 +626,20 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); + size_t threads = presink_chains.size(); + + pipeline.resize(1); + + pipeline.addTransform(std::make_shared( + header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + + pipeline.resize(threads); + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - return std::make_shared( + return std::make_shared( in_header, table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); @@ -683,12 +695,19 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); chain.addSource(std::move(squashing)); + + auto balancing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(balancing)); } auto context_ptr = getContext(); diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index eeb762b4d7e..32c475d138f 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -127,14 +127,23 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c { StorageID from_table_id{elem.from_database_name, elem.from_table_name}; StorageID to_table_id{elem.to_database_name, elem.to_table_name}; - std::vector ref_dependencies; - std::vector loading_dependencies; + std::vector from_ref_dependencies; + std::vector from_loading_dependencies; + std::vector to_ref_dependencies; + std::vector to_loading_dependencies; - if (!exchange_tables) + if (exchange_tables) { + DatabaseCatalog::instance().checkTablesCanBeExchangedWithNoCyclicDependencies(from_table_id, to_table_id); + std::tie(from_ref_dependencies, from_loading_dependencies) = database_catalog.removeDependencies(from_table_id, false, false); + std::tie(to_ref_dependencies, to_loading_dependencies) = database_catalog.removeDependencies(to_table_id, false, false); + } + else + { + DatabaseCatalog::instance().checkTableCanBeRenamedWithNoCyclicDependencies(from_table_id, to_table_id); bool check_ref_deps = getContext()->getSettingsRef().check_referential_table_dependencies; bool check_loading_deps = !check_ref_deps && getContext()->getSettingsRef().check_table_dependencies; - std::tie(ref_dependencies, loading_dependencies) = database_catalog.removeDependencies(from_table_id, check_ref_deps, check_loading_deps); + std::tie(from_ref_dependencies, from_loading_dependencies) = database_catalog.removeDependencies(from_table_id, check_ref_deps, check_loading_deps); } try @@ -147,12 +156,17 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c exchange_tables, rename.dictionary); - DatabaseCatalog::instance().addDependencies(to_table_id, ref_dependencies, loading_dependencies); + DatabaseCatalog::instance().addDependencies(to_table_id, from_ref_dependencies, from_loading_dependencies); + if (!to_ref_dependencies.empty() || !to_loading_dependencies.empty()) + DatabaseCatalog::instance().addDependencies(from_table_id, to_ref_dependencies, to_loading_dependencies); + } catch (...) { /// Restore dependencies if RENAME fails - DatabaseCatalog::instance().addDependencies(from_table_id, ref_dependencies, loading_dependencies); + DatabaseCatalog::instance().addDependencies(from_table_id, from_ref_dependencies, from_loading_dependencies); + if (!to_ref_dependencies.empty() || !to_loading_dependencies.empty()) + DatabaseCatalog::instance().addDependencies(to_table_id, to_ref_dependencies, to_loading_dependencies); throw; } } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 8e072779b53..90c484636ea 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -175,11 +175,10 @@ FilterDAGInfoPtr generateFilterActions( /// Using separate expression analyzer to prevent any possible alias injection auto syntax_result = TreeRewriter(context).analyzeSelect(query_ast, TreeRewriterResult({}, storage, storage_snapshot)); SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot, {}, false, {}, prepared_sets); - filter_info->actions = analyzer.simpleSelectActions(); + filter_info->actions = std::make_unique(std::move(analyzer.simpleSelectActions()->dag)); filter_info->column_name = expr_list->children.at(0)->getColumnName(); filter_info->actions->removeUnusedActions(NameSet{filter_info->column_name}); - filter_info->actions->projectInput(false); for (const auto * node : filter_info->actions->getInputs()) filter_info->actions->getOutputs().push_back(node); @@ -911,7 +910,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() UInt64 max_rows = maxBlockSizeByLimit(); if (settings.max_rows_to_read) max_rows = max_rows ? std::min(max_rows, settings.max_rows_to_read.value) : settings.max_rows_to_read; - query_info_copy.limit = max_rows; + query_info_copy.trivial_limit = max_rows; /// Apply filters to prewhere and add them to the query_info so we can filter out parts efficiently during row estimation applyFiltersToPrewhereInAnalysis(analysis_copy); @@ -1078,15 +1077,15 @@ Block InterpreterSelectQuery::getSampleBlockImpl() // with this code. See // https://github.com/ClickHouse/ClickHouse/issues/19857 for details. if (analysis_result.before_window) - return analysis_result.before_window->getResultColumns(); + return analysis_result.before_window->dag.getResultColumns(); // NOTE: should not handle before_limit_by specially since // WithMergeableState does not process LIMIT BY - return analysis_result.before_order_by->getResultColumns(); + return analysis_result.before_order_by->dag.getResultColumns(); } - Block header = analysis_result.before_aggregation->getResultColumns(); + Block header = analysis_result.before_aggregation->dag.getResultColumns(); Block res; @@ -1124,18 +1123,18 @@ Block InterpreterSelectQuery::getSampleBlockImpl() // It's different from selected_columns, see the comment above for // WithMergeableState stage. if (analysis_result.before_window) - return analysis_result.before_window->getResultColumns(); + return analysis_result.before_window->dag.getResultColumns(); // In case of query on remote shards executed up to // WithMergeableStateAfterAggregation*, they can process LIMIT BY, // since the initiator will not apply LIMIT BY again. if (analysis_result.before_limit_by) - return analysis_result.before_limit_by->getResultColumns(); + return analysis_result.before_limit_by->dag.getResultColumns(); - return analysis_result.before_order_by->getResultColumns(); + return analysis_result.before_order_by->dag.getResultColumns(); } - return analysis_result.final_projection->getResultColumns(); + return analysis_result.final_projection->dag.getResultColumns(); } @@ -1474,6 +1473,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

(source_header); @@ -1636,12 +1638,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

(query_plan.getCurrentDataStream(), expressions.before_array_join); - before_array_join_step->setStepDescription("Before ARRAY JOIN"); - query_plan.addStep(std::move(before_array_join_step)); - } + executeExpression(query_plan, expressions.before_array_join, "Before ARRAY JOIN"); if (expressions.array_join) { @@ -1653,23 +1650,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( - query_plan.getCurrentDataStream(), - expressions.before_join); - before_join_step->setStepDescription("Before JOIN"); - query_plan.addStep(std::move(before_join_step)); - } + executeExpression(query_plan, expressions.before_join, "Before JOIN"); /// Optional step to convert key columns to common supertype. if (expressions.converting_join_columns) - { - QueryPlanStepPtr convert_join_step = std::make_unique( - query_plan.getCurrentDataStream(), - expressions.converting_join_columns); - convert_join_step->setStepDescription("Convert JOIN columns"); - query_plan.addStep(std::move(convert_join_step)); - } + executeExpression(query_plan, expressions.converting_join_columns, "Convert JOIN columns"); if (expressions.hasJoin()) { @@ -2113,7 +2098,6 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis { /// Execute row level filter in prewhere as a part of "move to prewhere" optimization. analysis.prewhere_info = std::make_shared(analysis.filter_info->actions, analysis.filter_info->column_name); - analysis.prewhere_info->prewhere_actions->projectInput(false); analysis.prewhere_info->remove_prewhere_column = analysis.filter_info->do_remove_column; analysis.prewhere_info->need_filter = true; analysis.filter_info = nullptr; @@ -2124,7 +2108,6 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis /// Add row level security actions to prewhere. analysis.prewhere_info->row_level_filter = analysis.filter_info->actions; analysis.prewhere_info->row_level_column_name = analysis.filter_info->column_name; - analysis.prewhere_info->row_level_filter->projectInput(false); analysis.filter_info = nullptr; } } @@ -2333,7 +2316,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle } if (analysis_result.hasWhere()) { - filter_nodes.push_back(&analysis_result.before_where->findInOutputs(analysis_result.where_column_name)); + filter_nodes.push_back(&analysis_result.before_where->dag.findInOutputs(analysis_result.where_column_name)); } auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes); @@ -2399,7 +2382,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc auto column = ColumnAggregateFunction::create(func); column->insertFrom(place); - Block header = analysis_result.before_aggregation->getResultColumns(); + Block header = analysis_result.before_aggregation->dag.getResultColumns(); size_t arguments_size = desc.argument_names.size(); DataTypes argument_types(arguments_size); for (size_t j = 0; j < arguments_size; ++j) @@ -2462,13 +2445,13 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc if (local_limits.local_limits.size_limits.max_rows != 0) { if (max_block_limited < local_limits.local_limits.size_limits.max_rows) - query_info.limit = max_block_limited; + query_info.trivial_limit = max_block_limited; else if (local_limits.local_limits.size_limits.max_rows < std::numeric_limits::max()) /// Ask to read just enough rows to make the max_rows limit effective (so it has a chance to be triggered). - query_info.limit = 1 + local_limits.local_limits.size_limits.max_rows; + query_info.trivial_limit = 1 + local_limits.local_limits.size_limits.max_rows; } else { - query_info.limit = max_block_limited; + query_info.trivial_limit = max_block_limited; } } @@ -2554,6 +2537,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc query_info.storage_limits = std::make_shared(storage_limits); query_info.settings_limit_offset_done = options.settings_limit_offset_done; + storage->read(query_plan, required_columns, storage_snapshot, query_info, context, processing_stage, max_block_size, max_streams); if (context->hasQueryContext() && !options.is_internal) @@ -2595,10 +2579,14 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc } } -void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter) +void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter) { + auto dag = expression->dag.clone(); + if (expression->project_input) + dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + auto where_step = std::make_unique( - query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(), remove_filter); + query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().where()->getColumnName(), remove_filter); where_step->setStepDescription("WHERE"); query_plan.addStep(std::move(where_step)); @@ -2672,11 +2660,9 @@ static GroupingSetsParamsList getAggregatorGroupingSetsParams(const SelectQueryE return result; } -void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info) +void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info) { - auto expression_before_aggregation = std::make_unique(query_plan.getCurrentDataStream(), expression); - expression_before_aggregation->setStepDescription("Before GROUP BY"); - query_plan.addStep(std::move(expression_before_aggregation)); + executeExpression(query_plan, expression, "Before GROUP BY"); AggregateDescriptions aggregates = query_analyzer->aggregates(); const Settings & settings = context->getSettingsRef(); @@ -2767,10 +2753,14 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool } -void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter) +void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter) { + auto dag = expression->dag.clone(); + if (expression->project_input) + dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + auto having_step - = std::make_unique(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), remove_filter); + = std::make_unique(query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().having()->getColumnName(), remove_filter); having_step->setStepDescription("HAVING"); query_plan.addStep(std::move(having_step)); @@ -2778,15 +2768,23 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Actions void InterpreterSelectQuery::executeTotalsAndHaving( - QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final) + QueryPlan & query_plan, bool has_having, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter, bool overflow_row, bool final) { + ActionsDAGPtr dag; + if (expression) + { + dag = expression->dag.clone(); + if (expression->project_input) + dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + } + const Settings & settings = context->getSettingsRef(); auto totals_having_step = std::make_unique( query_plan.getCurrentDataStream(), query_analyzer->aggregates(), overflow_row, - expression, + std::move(dag), has_having ? getSelectQuery().having()->getColumnName() : "", remove_filter, settings.totals_mode, @@ -2819,12 +2817,16 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPlan & query_plan, Modific query_plan.addStep(std::move(step)); } -void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description) +void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, const std::string & description) { if (!expression) return; - auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), expression); + auto dag = expression->dag.clone(); + if (expression->project_input) + dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + + auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(dag)); expression_step->setStepDescription(description); query_plan.addStep(std::move(expression_step)); @@ -2994,11 +2996,9 @@ void InterpreterSelectQuery::executeMergeSorted(QueryPlan & query_plan, const st } -void InterpreterSelectQuery::executeProjection(QueryPlan & query_plan, const ActionsDAGPtr & expression) +void InterpreterSelectQuery::executeProjection(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression) { - auto projection_step = std::make_unique(query_plan.getCurrentDataStream(), expression); - projection_step->setStepDescription("Projection"); - query_plan.addStep(std::move(projection_step)); + executeExpression(query_plan, expression, "Projection"); } diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index e89a1e5febf..d4ed19d45ea 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -174,13 +174,13 @@ private: /// Different stages of query execution. void executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan); - void executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter); + void executeWhere(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter); void executeAggregation( - QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info); + QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info); void executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final, bool has_grouping_sets); - void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final); - void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter); - static void executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description); + void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter, bool overflow_row, bool final); + void executeHaving(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter); + static void executeExpression(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, const std::string & description); /// FIXME should go through ActionsDAG to behave as a proper function void executeWindow(QueryPlan & query_plan); void executeOrder(QueryPlan & query_plan, InputOrderInfoPtr sorting_info); @@ -191,7 +191,7 @@ private: void executeLimitBy(QueryPlan & query_plan); void executeLimit(QueryPlan & query_plan); void executeOffset(QueryPlan & query_plan); - static void executeProjection(QueryPlan & query_plan, const ActionsDAGPtr & expression); + static void executeProjection(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression); void executeDistinct(QueryPlan & query_plan, bool before_order, Names columns, bool pre_distinct); void executeExtremes(QueryPlan & query_plan); void executeSubqueriesInSetsAndJoins(QueryPlan & query_plan); diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp index 6ed29cfadcb..596b0e4f96c 100644 --- a/src/Interpreters/MetricLog.cpp +++ b/src/Interpreters/MetricLog.cpp @@ -56,78 +56,32 @@ void MetricLogElement::appendToBlock(MutableColumns & columns) const columns[column_idx++]->insert(current_metrics[i].toUnderType()); } - -void MetricLog::startCollectMetric(size_t collect_interval_milliseconds_) +void MetricLog::stepFunction(const std::chrono::system_clock::time_point current_time) { - collect_interval_milliseconds = collect_interval_milliseconds_; - is_shutdown_metric_thread = false; - metric_flush_thread = std::make_unique([this] { metricThreadFunction(); }); -} - - -void MetricLog::stopCollectMetric() -{ - bool old_val = false; - if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true)) - return; - if (metric_flush_thread) - metric_flush_thread->join(); -} - - -void MetricLog::shutdown() -{ - stopCollectMetric(); - stopFlushThread(); -} - - -void MetricLog::metricThreadFunction() -{ - auto desired_timepoint = std::chrono::system_clock::now(); - + /// Static lazy initialization to avoid polluting the header with implementation details /// For differentiation of ProfileEvents counters. - std::vector prev_profile_events(ProfileEvents::end()); + static std::vector prev_profile_events(ProfileEvents::end()); - while (!is_shutdown_metric_thread) + MetricLogElement elem; + elem.event_time = std::chrono::system_clock::to_time_t(current_time); + elem.event_time_microseconds = timeInMicroseconds(current_time); + + elem.profile_events.resize(ProfileEvents::end()); + for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) { - try - { - const auto current_time = std::chrono::system_clock::now(); - - MetricLogElement elem; - elem.event_time = std::chrono::system_clock::to_time_t(current_time); - elem.event_time_microseconds = timeInMicroseconds(current_time); - - elem.profile_events.resize(ProfileEvents::end()); - for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) - { - const ProfileEvents::Count new_value = ProfileEvents::global_counters[i].load(std::memory_order_relaxed); - auto & old_value = prev_profile_events[i]; - elem.profile_events[i] = new_value - old_value; - old_value = new_value; - } - - elem.current_metrics.resize(CurrentMetrics::end()); - for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) - { - elem.current_metrics[i] = CurrentMetrics::values[i]; - } - - this->add(std::move(elem)); - - /// We will record current time into table but align it to regular time intervals to avoid time drift. - /// We may drop some time points if the server is overloaded and recording took too much time. - while (desired_timepoint <= current_time) - desired_timepoint += std::chrono::milliseconds(collect_interval_milliseconds); - - std::this_thread::sleep_until(desired_timepoint); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } + const ProfileEvents::Count new_value = ProfileEvents::global_counters[i].load(std::memory_order_relaxed); + auto & old_value = prev_profile_events[i]; + elem.profile_events[i] = new_value - old_value; + old_value = new_value; } + + elem.current_metrics.resize(CurrentMetrics::end()); + for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) + { + elem.current_metrics[i] = CurrentMetrics::values[i]; + } + + this->add(std::move(elem)); } } diff --git a/src/Interpreters/MetricLog.h b/src/Interpreters/MetricLog.h index 4f1e8fafc11..a6fd3ecfcd3 100644 --- a/src/Interpreters/MetricLog.h +++ b/src/Interpreters/MetricLog.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -9,7 +10,6 @@ #include #include -#include #include @@ -33,26 +33,12 @@ struct MetricLogElement void appendToBlock(MutableColumns & columns) const; }; - -class MetricLog : public SystemLog +class MetricLog : public PeriodicLog { - using SystemLog::SystemLog; + using PeriodicLog::PeriodicLog; -public: - void shutdown() override; - - /// Launches a background thread to collect metrics with interval - void startCollectMetric(size_t collect_interval_milliseconds_); - - /// Stop background thread. Call before shutdown. - void stopCollectMetric(); - -private: - void metricThreadFunction(); - - std::unique_ptr metric_flush_thread; - size_t collect_interval_milliseconds; - std::atomic is_shutdown_metric_thread{false}; +protected: + void stepFunction(TimePoint current_time) override; }; } diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index ba33b70b59c..6d3a4f30b34 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -1137,9 +1137,9 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s for (const auto & kv : stage.column_to_updated) { auto column_name = kv.second->getColumnName(); - const auto & dag_node = actions->findInOutputs(column_name); - const auto & alias = actions->addAlias(dag_node, kv.first); - actions->addOrReplaceInOutputs(alias); + const auto & dag_node = actions->dag.findInOutputs(column_name); + const auto & alias = actions->dag.addAlias(dag_node, kv.first); + actions->dag.addOrReplaceInOutputs(alias); } } @@ -1202,7 +1202,7 @@ void MutationsInterpreter::Source::read( { ActionsDAG::NodeRawConstPtrs nodes(num_filters); for (size_t i = 0; i < num_filters; ++i) - nodes[i] = &steps[i]->actions()->findInOutputs(names[i]); + nodes[i] = &steps[i]->actions()->dag.findInOutputs(names[i]); filter = ActionsDAG::buildFilterActionsDAG(nodes); } @@ -1273,18 +1273,24 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v for (size_t i = 0; i < stage.expressions_chain.steps.size(); ++i) { const auto & step = stage.expressions_chain.steps[i]; - if (step->actions()->hasArrayJoin()) + if (step->actions()->dag.hasArrayJoin()) throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "arrayJoin is not allowed in mutations"); if (i < stage.filter_column_names.size()) { + auto dag = step->actions()->dag.clone(); + if (step->actions()->project_input) + dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute DELETEs. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->actions(), stage.filter_column_names[i], false)); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), dag, stage.filter_column_names[i], false)); } else { + auto dag = step->actions()->dag.clone(); + if (step->actions()->project_input) + dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute UPDATE or final projection. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->actions())); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), dag)); } } diff --git a/src/Interpreters/S3QueueLog.cpp b/src/Interpreters/ObjectStorageQueueLog.cpp similarity index 86% rename from src/Interpreters/S3QueueLog.cpp rename to src/Interpreters/ObjectStorageQueueLog.cpp index ba990a8ac25..24261429434 100644 --- a/src/Interpreters/S3QueueLog.cpp +++ b/src/Interpreters/ObjectStorageQueueLog.cpp @@ -8,19 +8,19 @@ #include #include #include -#include +#include namespace DB { -ColumnsDescription S3QueueLogElement::getColumnsDescription() +ColumnsDescription ObjectStorageQueueLogElement::getColumnsDescription() { auto status_datatype = std::make_shared( DataTypeEnum8::Values { - {"Processed", static_cast(S3QueueLogElement::S3QueueStatus::Processed)}, - {"Failed", static_cast(S3QueueLogElement::S3QueueStatus::Failed)}, + {"Processed", static_cast(ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Processed)}, + {"Failed", static_cast(ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Failed)}, }); return ColumnsDescription @@ -41,7 +41,7 @@ ColumnsDescription S3QueueLogElement::getColumnsDescription() }; } -void S3QueueLogElement::appendToBlock(MutableColumns & columns) const +void ObjectStorageQueueLogElement::appendToBlock(MutableColumns & columns) const { size_t i = 0; columns[i++]->insert(getFQDNOrHostName()); diff --git a/src/Interpreters/S3QueueLog.h b/src/Interpreters/ObjectStorageQueueLog.h similarity index 69% rename from src/Interpreters/S3QueueLog.h rename to src/Interpreters/ObjectStorageQueueLog.h index 19e69c39247..b0e843a0cc3 100644 --- a/src/Interpreters/S3QueueLog.h +++ b/src/Interpreters/ObjectStorageQueueLog.h @@ -9,7 +9,7 @@ namespace DB { -struct S3QueueLogElement +struct ObjectStorageQueueLogElement { time_t event_time{}; @@ -20,18 +20,18 @@ struct S3QueueLogElement std::string file_name; size_t rows_processed = 0; - enum class S3QueueStatus : uint8_t + enum class ObjectStorageQueueStatus : uint8_t { Processed, Failed, }; - S3QueueStatus status; + ObjectStorageQueueStatus status; ProfileEvents::Counters::Snapshot counters_snapshot; time_t processing_start_time; time_t processing_end_time; std::string exception; - static std::string name() { return "S3QueueLog"; } + static std::string name() { return "ObjectStorageQueueLog"; } static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } @@ -39,9 +39,9 @@ struct S3QueueLogElement void appendToBlock(MutableColumns & columns) const; }; -class S3QueueLog : public SystemLog +class ObjectStorageQueueLog : public SystemLog { - using SystemLog::SystemLog; + using SystemLog::SystemLog; }; } diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp new file mode 100644 index 00000000000..9d2891e11eb --- /dev/null +++ b/src/Interpreters/PeriodicLog.cpp @@ -0,0 +1,62 @@ +#include +#include +#include + +namespace DB +{ + +template +void PeriodicLog::startCollect(size_t collect_interval_milliseconds_) +{ + collect_interval_milliseconds = collect_interval_milliseconds_; + is_shutdown_metric_thread = false; + flush_thread = std::make_unique([this] { threadFunction(); }); +} + +template +void PeriodicLog::stopCollect() +{ + bool old_val = false; + if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true)) + return; + if (flush_thread) + flush_thread->join(); +} + +template +void PeriodicLog::shutdown() +{ + stopCollect(); + this->stopFlushThread(); +} + +template +void PeriodicLog::threadFunction() +{ + auto desired_timepoint = std::chrono::system_clock::now(); + while (!is_shutdown_metric_thread) + { + try + { + const auto current_time = std::chrono::system_clock::now(); + + stepFunction(current_time); + + /// We will record current time into table but align it to regular time intervals to avoid time drift. + /// We may drop some time points if the server is overloaded and recording took too much time. + while (desired_timepoint <= current_time) + desired_timepoint += std::chrono::milliseconds(collect_interval_milliseconds); + + std::this_thread::sleep_until(desired_timepoint); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } +} + +#define INSTANTIATE_SYSTEM_LOG(ELEMENT) template class PeriodicLog; +SYSTEM_PERIODIC_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG) + +} diff --git a/src/Interpreters/PeriodicLog.h b/src/Interpreters/PeriodicLog.h new file mode 100644 index 00000000000..08c3f7eb23f --- /dev/null +++ b/src/Interpreters/PeriodicLog.h @@ -0,0 +1,43 @@ +#pragma once + +#include +#include + +#include +#include + +#define SYSTEM_PERIODIC_LOG_ELEMENTS(M) \ + M(ErrorLogElement) \ + M(MetricLogElement) + +namespace DB +{ + +template +class PeriodicLog : public SystemLog +{ + using SystemLog::SystemLog; + +public: + using TimePoint = std::chrono::system_clock::time_point; + + /// Launches a background thread to collect metrics with interval + void startCollect(size_t collect_interval_milliseconds_); + + /// Stop background thread + void stopCollect(); + + void shutdown() final; + +protected: + virtual void stepFunction(TimePoint current_time) = 0; + +private: + void threadFunction(); + + std::unique_ptr flush_thread; + size_t collect_interval_milliseconds; + std::atomic is_shutdown_metric_thread{false}; +}; + +} diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index 92f8ddae141..e63a2ae31aa 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -136,6 +136,9 @@ ColumnsDescription QueryLogElement::getColumnsDescription() {"used_row_policies", array_low_cardinality_string, "The list of row policies names that were used during query execution."}, + {"used_privileges", array_low_cardinality_string, "Privileges which were successfully checked during query execution."}, + {"missing_privileges", array_low_cardinality_string, "Privileges that are missing during query execution."}, + {"transaction_id", getTransactionIDDataType(), "The identifier of the transaction in scope of which this query was executed."}, {"query_cache_usage", std::move(query_cache_usage_datatype), "Usage of the query cache during query execution. Values: 'Unknown' = Status unknown, 'None' = The query result was neither written into nor read from the query cache, 'Write' = The query result was written into the query cache, 'Read' = The query result was read from the query cache."}, @@ -267,6 +270,8 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const auto & column_storage_factory_objects = typeid_cast(*columns[i++]); auto & column_table_function_factory_objects = typeid_cast(*columns[i++]); auto & column_row_policies_names = typeid_cast(*columns[i++]); + auto & column_used_privileges = typeid_cast(*columns[i++]); + auto & column_missing_privileges = typeid_cast(*columns[i++]); auto fill_column = [](const auto & data, ColumnArray & column) { @@ -290,6 +295,8 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const fill_column(used_storages, column_storage_factory_objects); fill_column(used_table_functions, column_table_function_factory_objects); fill_column(used_row_policies, column_row_policies_names); + fill_column(used_privileges, column_used_privileges); + fill_column(missing_privileges, column_missing_privileges); } columns[i++]->insert(Tuple{tid.start_csn, tid.local_tid, tid.host_id}); diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index 5072d220160..bbaa7179757 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -81,6 +81,8 @@ struct QueryLogElement std::unordered_set used_storages; std::unordered_set used_table_functions; std::set used_row_policies; + std::unordered_set used_privileges; + std::unordered_set missing_privileges; Int32 exception_code{}; // because ErrorCodes are int String exception; diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 83ff025d2a6..872a9f864df 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -233,29 +233,22 @@ void ServerAsynchronousMetrics::updateImpl(TimePoint update_time, TimePoint curr } #if USE_AWS_S3 - try + if (auto s3_client = disk->tryGetS3StorageClient()) { - if (auto s3_client = disk->getS3StorageClient()) + if (auto put_throttler = s3_client->getPutRequestThrottler()) { - if (auto put_throttler = s3_client->getPutRequestThrottler()) - { - new_values[fmt::format("DiskPutObjectThrottlerRPS_{}", name)] = { put_throttler->getMaxSpeed(), - "PutObject Request throttling limit on the disk in requests per second (virtual filesystem). Local filesystems may not provide this information." }; - new_values[fmt::format("DiskPutObjectThrottlerAvailable_{}", name)] = { put_throttler->getAvailable(), - "Number of PutObject requests that can be currently issued without hitting throttling limit on the disk (virtual filesystem). Local filesystems may not provide this information." }; - } - if (auto get_throttler = s3_client->getGetRequestThrottler()) - { - new_values[fmt::format("DiskGetObjectThrottlerRPS_{}", name)] = { get_throttler->getMaxSpeed(), - "GetObject Request throttling limit on the disk in requests per second (virtual filesystem). Local filesystems may not provide this information." }; - new_values[fmt::format("DiskGetObjectThrottlerAvailable_{}", name)] = { get_throttler->getAvailable(), - "Number of GetObject requests that can be currently issued without hitting throttling limit on the disk (virtual filesystem). Local filesystems may not provide this information." }; - } + new_values[fmt::format("DiskPutObjectThrottlerRPS_{}", name)] = { put_throttler->getMaxSpeed(), + "PutObject Request throttling limit on the disk in requests per second (virtual filesystem). Local filesystems may not provide this information." }; + new_values[fmt::format("DiskPutObjectThrottlerAvailable_{}", name)] = { put_throttler->getAvailable(), + "Number of PutObject requests that can be currently issued without hitting throttling limit on the disk (virtual filesystem). Local filesystems may not provide this information." }; + } + if (auto get_throttler = s3_client->getGetRequestThrottler()) + { + new_values[fmt::format("DiskGetObjectThrottlerRPS_{}", name)] = { get_throttler->getMaxSpeed(), + "GetObject Request throttling limit on the disk in requests per second (virtual filesystem). Local filesystems may not provide this information." }; + new_values[fmt::format("DiskGetObjectThrottlerAvailable_{}", name)] = { get_throttler->getAvailable(), + "Number of GetObject requests that can be currently issued without hitting throttling limit on the disk (virtual filesystem). Local filesystems may not provide this information." }; } - } - catch (...) // NOLINT(bugprone-empty-catch) - { - // Skip disk that do not have s3 throttlers } #endif } diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 396562189e0..bb8c415602f 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -532,7 +532,7 @@ ContextMutablePtr Session::makeSessionContext() session_context->checkSettingsConstraints(settings_from_auth_server, SettingSource::QUERY); session_context->applySettingsChanges(settings_from_auth_server); - recordLoginSucess(session_context); + recordLoginSuccess(session_context); return session_context; } @@ -596,7 +596,7 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: { session_name_ }, max_sessions_for_user); - recordLoginSucess(session_context); + recordLoginSuccess(session_context); return session_context; } @@ -672,13 +672,13 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t user = query_context->getUser(); /// Interserver does not create session context - recordLoginSucess(query_context); + recordLoginSuccess(query_context); return query_context; } -void Session::recordLoginSucess(ContextPtr login_context) const +void Session::recordLoginSuccess(ContextPtr login_context) const { if (notified_session_log_about_login) return; @@ -694,7 +694,7 @@ void Session::recordLoginSucess(ContextPtr login_context) const session_log->addLoginSuccess(auth_id, named_session ? named_session->key.second : "", settings, - access, + access->getAccess(), getClientInfo(), user); } diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 14f6f806acd..fc41c78e666 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -102,8 +102,7 @@ public: private: std::shared_ptr getSessionLog() const; ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const; - void recordLoginSucess(ContextPtr login_context) const; - + void recordLoginSuccess(ContextPtr login_context) const; mutable bool notified_session_log_about_login = false; const UUID auth_id; diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index adb94cae0c2..0615a2a1d62 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -86,6 +86,7 @@ ColumnsDescription SessionLogElement::getColumnsDescription() AUTH_TYPE_NAME_AND_VALUE(AuthType::SHA256_PASSWORD), AUTH_TYPE_NAME_AND_VALUE(AuthType::DOUBLE_SHA1_PASSWORD), AUTH_TYPE_NAME_AND_VALUE(AuthType::LDAP), + AUTH_TYPE_NAME_AND_VALUE(AuthType::JWT), AUTH_TYPE_NAME_AND_VALUE(AuthType::KERBEROS), AUTH_TYPE_NAME_AND_VALUE(AuthType::SSH_KEY), AUTH_TYPE_NAME_AND_VALUE(AuthType::SSL_CERTIFICATE), @@ -93,7 +94,7 @@ ColumnsDescription SessionLogElement::getColumnsDescription() AUTH_TYPE_NAME_AND_VALUE(AuthType::HTTP), }); #undef AUTH_TYPE_NAME_AND_VALUE - static_assert(static_cast(AuthenticationType::MAX) == 10); + static_assert(static_cast(AuthenticationType::MAX) == 11); auto interface_type_column = std::make_shared( DataTypeEnum8::Values @@ -214,7 +215,7 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, const ClientInfo & client_info, const UserPtr & login_user) { - DB::SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS); + SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS); log_entry.client_info = client_info; if (login_user) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp new file mode 100644 index 00000000000..f8b6a6542cc --- /dev/null +++ b/src/Interpreters/Squashing.cpp @@ -0,0 +1,159 @@ +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) + : header(header_) + , min_block_size_rows(min_block_size_rows_) + , min_block_size_bytes(min_block_size_bytes_) +{ +} + +Chunk Squashing::flush() +{ + return convertToChunk(std::move(chunks_to_merge_vec)); +} + +Chunk Squashing::squash(Chunk && input_chunk) +{ + if (!input_chunk.hasChunkInfo()) + return Chunk(); + + const auto *info = getInfoFromChunk(input_chunk); + return squash(info->chunks); +} + +Chunk Squashing::add(Chunk && input_chunk) +{ + if (!input_chunk) + return {}; + + /// Just read block is already enough. + if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes())) + { + /// If no accumulated data, return just read block. + if (chunks_to_merge_vec.empty()) + { + chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + return res_chunk; + } + + /// Return accumulated data (maybe it has small size) and place new block to accumulated data. + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); + return res_chunk; + } + + /// Accumulated block is already enough. + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + { + /// Return accumulated data and place new block to accumulated data. + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); + return res_chunk; + } + + /// Pushing data into accumulating vector + expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); + + /// If accumulated data is big enough, we send it + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + { + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + changeCurrentSize(0, 0); + chunks_to_merge_vec.clear(); + return res_chunk; + } + return {}; +} + +Chunk Squashing::convertToChunk(std::vector && chunks) const +{ + if (chunks.empty()) + return {}; + + auto info = std::make_shared(); + info->chunks = std::move(chunks); + + chunks.clear(); + + return Chunk(header.cloneEmptyColumns(), 0, info); +} + +Chunk Squashing::squash(std::vector & input_chunks) +{ + Chunk accumulated_chunk; + std::vector mutable_columns = {}; + size_t rows = 0; + for (const Chunk & chunk : input_chunks) + rows += chunk.getNumRows(); + + { + auto & first_chunk = input_chunks[0]; + Columns columns = first_chunk.detachColumns(); + for (auto & column : columns) + { + mutable_columns.push_back(IColumn::mutate(std::move(column))); + mutable_columns.back()->reserve(rows); + } + } + + for (size_t i = 1; i < input_chunks.size(); ++i) // We've already processed the first chunk above + { + Columns columns = input_chunks[i].detachColumns(); + for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) + { + const auto source_column = columns[j]; + + mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); + } + } + accumulated_chunk.setColumns(std::move(mutable_columns), rows); + return accumulated_chunk; +} + +const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk) +{ + const auto& info = chunk.getChunkInfo(); + const auto * agg_info = typeid_cast(info.get()); + + if (!agg_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); + + return agg_info; +} + +void Squashing::expandCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows += rows; + accumulated_size.bytes += bytes; +} + +void Squashing::changeCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows = rows; + accumulated_size.bytes = bytes; +} + +bool Squashing::isEnoughSize(size_t rows, size_t bytes) const +{ + return (!min_block_size_rows && !min_block_size_bytes) + || (min_block_size_rows && rows >= min_block_size_rows) + || (min_block_size_bytes && bytes >= min_block_size_bytes); +} +} diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h new file mode 100644 index 00000000000..d76cca60e41 --- /dev/null +++ b/src/Interpreters/Squashing.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +struct ChunksToSquash : public ChunkInfo +{ + mutable std::vector chunks = {}; +}; + +/** Merging consecutive passed blocks to specified minimum size. + * + * (But if one of input blocks has already at least specified size, + * then don't merge it with neighbours, even if neighbours are small.) + * + * Used to prepare blocks to adequate size for INSERT queries, + * because such storages as Memory, StripeLog, Log, TinyLog... + * store or compress data in blocks exactly as passed to it, + * and blocks of small size are not efficient. + * + * Order of data is kept. + */ + +class Squashing +{ +public: + explicit Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); + Squashing(Squashing && other) = default; + + Chunk add(Chunk && input_chunk); + static Chunk squash(Chunk && input_chunk); + Chunk flush(); + + bool isDataLeft() + { + return !chunks_to_merge_vec.empty(); + } + + Block header; +private: + struct CurrentSize + { + size_t rows = 0; + size_t bytes = 0; + }; + + std::vector chunks_to_merge_vec = {}; + size_t min_block_size_rows; + size_t min_block_size_bytes; + + CurrentSize accumulated_size; + + static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); + + static Chunk squash(std::vector & input_chunks); + + void expandCurrentSize(size_t rows, size_t bytes); + void changeCurrentSize(size_t rows, size_t bytes); + bool isEnoughSize(size_t rows, size_t bytes) const; + + Chunk convertToChunk(std::vector && chunks) const; +}; + +} diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp deleted file mode 100644 index 41f024df7a7..00000000000 --- a/src/Interpreters/SquashingTransform.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; - extern const int LOGICAL_ERROR; -} - -SquashingTransform::SquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) - , min_block_size_bytes(min_block_size_bytes_) -{ -} - -Block SquashingTransform::add(Block && input_block) -{ - return addImpl(std::move(input_block)); -} - -Block SquashingTransform::add(const Block & input_block) -{ - return addImpl(input_block); -} - -/* - * To minimize copying, accept two types of argument: const reference for output - * stream, and rvalue reference for input stream, and decide whether to copy - * inside this function. This allows us not to copy Block unless we absolutely - * have to. - */ -template -Block SquashingTransform::addImpl(ReferenceType input_block) -{ - /// End of input stream. - if (!input_block) - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Just read block is already enough. - if (isEnoughSize(input_block)) - { - /// If no accumulated data, return just read block. - if (!accumulated_block) - { - return std::move(input_block); - } - - /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Block to_return = std::move(input_block); - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Accumulated block is already enough. - if (isEnoughSize(accumulated_block)) - { - /// Return accumulated data and place new block to accumulated data. - Block to_return = std::move(input_block); - std::swap(to_return, accumulated_block); - return to_return; - } - - append(std::move(input_block)); - if (isEnoughSize(accumulated_block)) - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Squashed block is not ready. - return {}; -} - - -template -void SquashingTransform::append(ReferenceType input_block) -{ - if (!accumulated_block) - { - accumulated_block = std::move(input_block); - return; - } - - assert(blocksHaveEqualStructure(input_block, accumulated_block)); - - try - { - for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) - { - const auto source_column = input_block.getByPosition(i).column; - - auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); - mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); - accumulated_block.getByPosition(i).column = std::move(mutable_column); - } - } - catch (...) - { - /// add() may be called again even after a previous add() threw an exception. - /// Keep accumulated_block in a valid state. - /// Seems ok to discard accumulated data because we're throwing an exception, which the caller will - /// hopefully interpret to mean "this block and all *previous* blocks are potentially lost". - accumulated_block.clear(); - throw; - } -} - - -bool SquashingTransform::isEnoughSize(const Block & block) -{ - size_t rows = 0; - size_t bytes = 0; - - for (const auto & [column, type, name] : block) - { - if (!column) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid column in block."); - - if (!rows) - rows = column->size(); - else if (rows != column->size()) - throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Sizes of columns doesn't match"); - - bytes += column->byteSize(); - } - - return isEnoughSize(rows, bytes); -} - - -bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const -{ - return (!min_block_size_rows && !min_block_size_bytes) - || (min_block_size_rows && rows >= min_block_size_rows) - || (min_block_size_bytes && bytes >= min_block_size_bytes); -} - -} diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h deleted file mode 100644 index b04d012bcd1..00000000000 --- a/src/Interpreters/SquashingTransform.h +++ /dev/null @@ -1,50 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ - - -/** Merging consecutive passed blocks to specified minimum size. - * - * (But if one of input blocks has already at least specified size, - * then don't merge it with neighbours, even if neighbours are small.) - * - * Used to prepare blocks to adequate size for INSERT queries, - * because such storages as Memory, StripeLog, Log, TinyLog... - * store or compress data in blocks exactly as passed to it, - * and blocks of small size are not efficient. - * - * Order of data is kept. - */ -class SquashingTransform -{ -public: - /// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored. - SquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_); - - /** Add next block and possibly returns squashed block. - * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. - */ - Block add(Block && block); - Block add(const Block & block); - -private: - size_t min_block_size_rows; - size_t min_block_size_bytes; - - Block accumulated_block; - - template - Block addImpl(ReferenceType block); - - template - void append(ReferenceType block); - - bool isEnoughSize(const Block & block); - bool isEnoughSize(size_t rows, size_t bytes) const; -}; - -} diff --git a/src/Interpreters/StorageID.h b/src/Interpreters/StorageID.h index 69dac8ea32d..f9afbc7b98d 100644 --- a/src/Interpreters/StorageID.h +++ b/src/Interpreters/StorageID.h @@ -1,7 +1,6 @@ #pragma once #include #include -#include #include #include #include diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 3b25deeb59d..557065b23ff 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -24,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -116,6 +117,7 @@ namespace { constexpr size_t DEFAULT_METRIC_LOG_COLLECT_INTERVAL_MILLISECONDS = 1000; +constexpr size_t DEFAULT_ERROR_LOG_COLLECT_INTERVAL_MILLISECONDS = 1000; /// Creates a system log with MergeTree engine using parameters from config template @@ -286,6 +288,7 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf crash_log = createSystemLog(global_context, "system", "crash_log", config, "crash_log", "Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur."); text_log = createSystemLog(global_context, "system", "text_log", config, "text_log", "Contains logging entries which are normally written to a log file or to stdout."); metric_log = createSystemLog(global_context, "system", "metric_log", config, "metric_log", "Contains history of metrics values from tables system.metrics and system.events, periodically flushed to disk."); + error_log = createSystemLog(global_context, "system", "error_log", config, "error_log", "Contains history of error values from table system.errors, periodically flushed to disk."); filesystem_cache_log = createSystemLog(global_context, "system", "filesystem_cache_log", config, "filesystem_cache_log", "Contains a history of all events occurred with filesystem cache for objects on a remote filesystem."); filesystem_read_prefetches_log = createSystemLog( global_context, "system", "filesystem_read_prefetches_log", config, "filesystem_read_prefetches_log", "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem."); @@ -303,7 +306,8 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf processors_profile_log = createSystemLog(global_context, "system", "processors_profile_log", config, "processors_profile_log", "Contains profiling information on processors level (building blocks for a pipeline for query execution."); asynchronous_insert_log = createSystemLog(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log", "Contains a history for all asynchronous inserts executed on current server."); backup_log = createSystemLog(global_context, "system", "backup_log", config, "backup_log", "Contains logging entries with the information about BACKUP and RESTORE operations."); - s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine."); + s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine."); + azure_queue_log = createSystemLog(global_context, "system", "azure_queue_log", config, "azure_queue_log", "Contains logging entries with the information files processes by S3Queue engine."); blob_storage_log = createSystemLog(global_context, "system", "blob_storage_log", config, "blob_storage_log", "Contains logging entries with information about various blob storage operations such as uploads and deletes."); if (query_log) @@ -320,6 +324,8 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf logs.emplace_back(text_log.get()); if (metric_log) logs.emplace_back(metric_log.get()); + if (error_log) + logs.emplace_back(error_log.get()); if (asynchronous_metric_log) logs.emplace_back(asynchronous_metric_log.get()); if (opentelemetry_span_log) @@ -366,7 +372,14 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf { size_t collect_interval_milliseconds = config.getUInt64("metric_log.collect_interval_milliseconds", DEFAULT_METRIC_LOG_COLLECT_INTERVAL_MILLISECONDS); - metric_log->startCollectMetric(collect_interval_milliseconds); + metric_log->startCollect(collect_interval_milliseconds); + } + + if (error_log) + { + size_t collect_interval_milliseconds = config.getUInt64("error_log.collect_interval_milliseconds", + DEFAULT_ERROR_LOG_COLLECT_INTERVAL_MILLISECONDS); + error_log->startCollect(collect_interval_milliseconds); } if (crash_log) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index af635ca1bdb..94cb8c3e2fd 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -40,6 +40,7 @@ class PartLog; class TextLog; class TraceLog; class CrashLog; +class ErrorLog; class MetricLog; class AsynchronousMetricLog; class OpenTelemetrySpanLog; @@ -52,7 +53,7 @@ class FilesystemCacheLog; class FilesystemReadPrefetchesLog; class AsynchronousInsertLog; class BackupLog; -class S3QueueLog; +class ObjectStorageQueueLog; class BlobStorageLog; /// System logs should be destroyed in destructor of the last Context and before tables, @@ -72,9 +73,11 @@ struct SystemLogs std::shared_ptr crash_log; /// Used to log server crashes. std::shared_ptr text_log; /// Used to log all text messages. std::shared_ptr metric_log; /// Used to log all metrics. + std::shared_ptr error_log; /// Used to log errors. std::shared_ptr filesystem_cache_log; std::shared_ptr filesystem_read_prefetches_log; - std::shared_ptr s3_queue_log; + std::shared_ptr s3_queue_log; + std::shared_ptr azure_queue_log; /// Metrics from system.asynchronous_metrics. std::shared_ptr asynchronous_metric_log; /// OpenTelemetry trace spans. diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index a74b5bba2b9..7f0fb8cd6ca 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include #include @@ -224,25 +226,37 @@ struct TemporaryFileStream::OutputWriter bool finalized = false; }; -TemporaryFileStream::Reader::Reader(const String & path, const Block & header_, size_t size) - : in_file_buf(path, size ? std::min(DBMS_DEFAULT_BUFFER_SIZE, size) : DBMS_DEFAULT_BUFFER_SIZE) - , in_compressed_buf(in_file_buf) - , in_reader(in_compressed_buf, header_, DBMS_TCP_PROTOCOL_VERSION) +TemporaryFileStream::Reader::Reader(const String & path_, const Block & header_, size_t size_) + : path(path_) + , size(size_ ? std::min(size_, DBMS_DEFAULT_BUFFER_SIZE) : DBMS_DEFAULT_BUFFER_SIZE) + , header(header_) { LOG_TEST(getLogger("TemporaryFileStream"), "Reading {} from {}", header_.dumpStructure(), path); } -TemporaryFileStream::Reader::Reader(const String & path, size_t size) - : in_file_buf(path, size ? std::min(DBMS_DEFAULT_BUFFER_SIZE, size) : DBMS_DEFAULT_BUFFER_SIZE) - , in_compressed_buf(in_file_buf) - , in_reader(in_compressed_buf, DBMS_TCP_PROTOCOL_VERSION) +TemporaryFileStream::Reader::Reader(const String & path_, size_t size_) + : path(path_) + , size(size_ ? std::min(size_, DBMS_DEFAULT_BUFFER_SIZE) : DBMS_DEFAULT_BUFFER_SIZE) { LOG_TEST(getLogger("TemporaryFileStream"), "Reading from {}", path); } Block TemporaryFileStream::Reader::read() { - return in_reader.read(); + if (!in_reader) + { + if (fs::exists(path)) + in_file_buf = std::make_unique(path, size); + else + in_file_buf = std::make_unique(); + + in_compressed_buf = std::make_unique(*in_file_buf); + if (header.has_value()) + in_reader = std::make_unique(*in_compressed_buf, header.value(), DBMS_TCP_PROTOCOL_VERSION); + else + in_reader = std::make_unique(*in_compressed_buf, DBMS_TCP_PROTOCOL_VERSION); + } + return in_reader->read(); } TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const Block & header_, TemporaryDataOnDisk * parent_) diff --git a/src/Interpreters/TemporaryDataOnDisk.h b/src/Interpreters/TemporaryDataOnDisk.h index 488eed70da9..d541c93e031 100644 --- a/src/Interpreters/TemporaryDataOnDisk.h +++ b/src/Interpreters/TemporaryDataOnDisk.h @@ -151,9 +151,13 @@ public: Block read(); - ReadBufferFromFile in_file_buf; - CompressedReadBuffer in_compressed_buf; - NativeReader in_reader; + const std::string path; + const size_t size; + const std::optional header; + + std::unique_ptr in_file_buf; + std::unique_ptr in_compressed_buf; + std::unique_ptr in_reader; }; struct Stat diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 9363e3d83eb..184c263dbdb 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -615,9 +615,9 @@ static bool decimalEqualsFloat(Field field, Float64 float_value) return decimal_to_float == float_value; } -std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type) +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & from_type, const IDataType & to_type) { - Field result_value = convertFieldToType(from_value, to_type); + Field result_value = convertFieldToType(from_value, to_type, &from_type); if (Field::isDecimal(from_value.getType()) && Field::isDecimal(result_value.getType())) { diff --git a/src/Interpreters/convertFieldToType.h b/src/Interpreters/convertFieldToType.h index 7f49ea5479d..4aa09f8619e 100644 --- a/src/Interpreters/convertFieldToType.h +++ b/src/Interpreters/convertFieldToType.h @@ -22,6 +22,6 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t /// Applies stricter rules than convertFieldToType, doesn't allow loss of precision converting to Decimal. /// Returns `Field` if the conversion was successful and the result is equal to the original value, otherwise returns nullopt. -std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type); +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & from_type, const IDataType & to_type); } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 9c5436517ab..9f33cbf1c27 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -44,6 +44,7 @@ #include #include +#include #include #include #include @@ -221,6 +222,17 @@ static void logException(ContextPtr context, QueryLogElement & elem, bool log_er LOG_INFO(getLogger("executeQuery"), message); } +static void +addPrivilegesInfoToQueryLogElement(QueryLogElement & element, const ContextPtr context_ptr) +{ + const auto & privileges_info = context_ptr->getQueryPrivilegesInfo(); + { + std::lock_guard lock(privileges_info.mutex); + element.used_privileges = privileges_info.used_privileges; + element.missing_privileges = privileges_info.missing_privileges; + } +} + static void addStatusInfoToQueryLogElement(QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) { @@ -286,6 +298,7 @@ addStatusInfoToQueryLogElement(QueryLogElement & element, const QueryStatusInfo } element.async_read_counters = context_ptr->getAsyncReadCounters(); + addPrivilegesInfoToQueryLogElement(element, context_ptr); } @@ -601,6 +614,8 @@ void logExceptionBeforeStart( elem.formatted_query = queryToString(ast); } + addPrivilegesInfoToQueryLogElement(elem, context); + // We don't calculate databases, tables and columns when the query isn't able to start elem.exception_code = getCurrentExceptionCode(); diff --git a/src/Interpreters/tests/gtest_actions_visitor.cpp b/src/Interpreters/tests/gtest_actions_visitor.cpp index 3de39ae6bfa..28e83306c53 100644 --- a/src/Interpreters/tests/gtest_actions_visitor.cpp +++ b/src/Interpreters/tests/gtest_actions_visitor.cpp @@ -31,7 +31,7 @@ TEST(ActionsVisitor, VisitLiteral) size_limits_for_set, size_t(0), name_and_types, - std::make_shared(name_and_types), + ActionsDAG(name_and_types), std::make_shared(), false /* no_subqueries */, false /* no_makeset */, @@ -39,7 +39,7 @@ TEST(ActionsVisitor, VisitLiteral) info); ActionsVisitor(visitor_data).visit(ast); auto actions = visitor_data.getActions(); - ASSERT_EQ(actions->getResultColumns().back().type->getTypeId(), expect_type->getTypeId()); + ASSERT_EQ(actions.getResultColumns().back().type->getTypeId(), expect_type->getTypeId()); } TEST(ActionsVisitor, VisitLiteralWithType) @@ -61,7 +61,7 @@ TEST(ActionsVisitor, VisitLiteralWithType) size_limits_for_set, size_t(0), name_and_types, - std::make_shared(name_and_types), + ActionsDAG(name_and_types), std::make_shared(), false /* no_subqueries */, false /* no_makeset */, @@ -69,5 +69,5 @@ TEST(ActionsVisitor, VisitLiteralWithType) info); ActionsVisitor(visitor_data).visit(ast); auto actions = visitor_data.getActions(); - ASSERT_EQ(actions->getResultColumns().back().type->getTypeId(), date_type->getTypeId()); + ASSERT_EQ(actions.getResultColumns().back().type->getTypeId(), date_type->getTypeId()); } diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 0bd4b94d999..35b96bce42a 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -321,7 +321,12 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log bool should_log_to_console = isatty(STDIN_FILENO) || isatty(STDERR_FILENO); if (config.getBool("logger.console", false) || (!config.hasProperty("logger.console") && !is_daemon && should_log_to_console)) - split->setLevel("console", log_level); + { + auto console_log_level_string = config.getString("logger.console_log_level", log_level_string); + auto console_log_level = Poco::Logger::parseLevel(console_log_level_string); + max_log_level = std::max(console_log_level, max_log_level); + split->setLevel("console", console_log_level); + } else split->setLevel("console", 0); diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 90b63d2ce6f..58eeb7c4cbf 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -60,6 +60,8 @@ ASTPtr ASTAlterCommand::clone() const res->settings_resets = res->children.emplace_back(settings_resets->clone()).get(); if (select) res->select = res->children.emplace_back(select->clone()).get(); + if (sql_security) + res->sql_security = res->children.emplace_back(sql_security->clone()).get(); if (rename_to) res->rename_to = res->children.emplace_back(rename_to->clone()).get(); @@ -522,6 +524,7 @@ void ASTAlterCommand::forEachPointerToChild(std::function f) f(reinterpret_cast(&settings_changes)); f(reinterpret_cast(&settings_resets)); f(reinterpret_cast(&select)); + f(reinterpret_cast(&sql_security)); f(reinterpret_cast(&rename_to)); } diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 3e5c6a9d86e..d56a2724914 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -404,8 +404,18 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat << quoteString(toString(to_inner_uuid)); } + bool should_add_empty = is_create_empty; + auto add_empty_if_needed = [&] + { + if (!should_add_empty) + return; + should_add_empty = false; + settings.ostr << (settings.hilite ? hilite_keyword : "") << " EMPTY" << (settings.hilite ? hilite_none : ""); + }; + if (!as_table.empty()) { + add_empty_if_needed(); settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "") << (!as_database.empty() ? backQuoteIfNeed(as_database) + "." : "") << backQuoteIfNeed(as_table); @@ -423,6 +433,7 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat frame.expression_list_always_start_on_new_line = false; } + add_empty_if_needed(); settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : ""); as_table_function->formatImpl(settings, state, frame); } @@ -484,8 +495,8 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat if (is_populate) settings.ostr << (settings.hilite ? hilite_keyword : "") << " POPULATE" << (settings.hilite ? hilite_none : ""); - else if (is_create_empty) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " EMPTY" << (settings.hilite ? hilite_none : ""); + + add_empty_if_needed(); if (sql_security && supportSQLSecurity() && sql_security->as().type.has_value()) { diff --git a/src/Parsers/ASTSQLSecurity.cpp b/src/Parsers/ASTSQLSecurity.cpp index d6f1c21d035..74408747290 100644 --- a/src/Parsers/ASTSQLSecurity.cpp +++ b/src/Parsers/ASTSQLSecurity.cpp @@ -7,7 +7,7 @@ namespace DB void ASTSQLSecurity::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - if (!type.has_value()) + if (!type) return; if (definer || is_definer_current_user) diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp index 3a62480dc0c..dab93ba3de5 100644 --- a/src/Parsers/Access/ASTAuthenticationData.cpp +++ b/src/Parsers/Access/ASTAuthenticationData.cpp @@ -89,6 +89,12 @@ void ASTAuthenticationData::formatImpl(const FormatSettings & settings, FormatSt password = true; break; } + case AuthenticationType::JWT: + { + prefix = "CLAIMS"; + parameter = true; + break; + } case AuthenticationType::LDAP: { prefix = "SERVER"; diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index f0cbe42da80..93642d94880 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -250,6 +250,7 @@ namespace DB MR_MACROS(IS_NOT_NULL, "IS NOT NULL") \ MR_MACROS(IS_NULL, "IS NULL") \ MR_MACROS(JOIN, "JOIN") \ + MR_MACROS(JWT, "JWT") \ MR_MACROS(KERBEROS, "KERBEROS") \ MR_MACROS(KEY_BY, "KEY BY") \ MR_MACROS(KEY, "KEY") \ diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 7cdfaf988a3..fff8383e7b3 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2179,7 +2179,7 @@ public: bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override { - /// kql(table|project ...) + /// kql('table|project ...') /// 0. Parse the kql query /// 1. Parse closing token if (state == 0) diff --git a/src/Parsers/FunctionSecretArgumentsFinderAST.h b/src/Parsers/FunctionSecretArgumentsFinderAST.h index 348b2ca9e3a..5b77485afb0 100644 --- a/src/Parsers/FunctionSecretArgumentsFinderAST.h +++ b/src/Parsers/FunctionSecretArgumentsFinderAST.h @@ -82,6 +82,16 @@ private: /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) findS3FunctionSecretArguments(/* is_cluster_function= */ true); } + else if (function.name == "azureBlobStorage") + { + /// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) + findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false); + } + else if (function.name == "azureBlobStorageCluster") + { + /// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) + findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true); + } else if ((function.name == "remote") || (function.name == "remoteSecure")) { /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) @@ -169,6 +179,43 @@ private: markSecretArgument(url_arg_idx + 2); } + void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function) + { + /// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument. + size_t url_arg_idx = is_cluster_function ? 1 : 0; + + if (!is_cluster_function && isNamedCollectionName(0)) + { + /// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...) + findSecretNamedArgument("account_key", 1); + return; + } + else if (is_cluster_function && isNamedCollectionName(1)) + { + /// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...) + findSecretNamedArgument("account_key", 2); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used + /// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) + /// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) + size_t count = arguments->size(); + if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7)) + { + String second_arg; + if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg)) + { + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) + } + } + + /// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature + if (url_arg_idx + 4 < count) + markSecretArgument(url_arg_idx + 4); + } + void findURLSecretArguments() { if (!isNamedCollectionName(0)) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index bea1a627129..440d0c73803 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -853,7 +853,7 @@ Please note that the functions listed below only take constant parameters for no ## KQL() function - create table - `CREATE TABLE kql_table4 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName,Age);` + `CREATE TABLE kql_table4 ENGINE = Memory AS select *, now() as new_column From kql($$Customers | project LastName,Age$$);` verify the content of `kql_table` `select * from kql_table` @@ -867,12 +867,12 @@ Please note that the functions listed below only take constant parameters for no Age Nullable(UInt8) ) ENGINE = Memory; ``` - `INSERT INTO temp select * from kql(Customers|project FirstName,LastName,Age);` + `INSERT INTO temp select * from kql($$Customers|project FirstName,LastName,Age$$);` verify the content of `temp` `select * from temp` - - Select from kql() - `Select * from kql(Customers|project FirstName)` + - Select from kql(...) + `Select * from kql($$Customers|project FirstName$$)` ## KQL operators: - Tabular expression statements @@ -993,4 +993,3 @@ Please note that the functions listed below only take constant parameters for no - dcount() - dcountif() - bin - \ No newline at end of file diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 1d77007a37c..affce53fbc7 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -301,8 +301,8 @@ String IParserKQLFunction::kqlCallToExpression( }); const auto kql_call = std::format("{}({})", function_name, params_str); - DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); - DB::IParser::Pos tokens_pos(call_tokens, max_depth, max_backtracks); + Tokens call_tokens(kql_call.data(), kql_call.data() + kql_call.length(), 0, true); + IParser::Pos tokens_pos(call_tokens, max_depth, max_backtracks); return DB::IParserKQLFunction::getExpression(tokens_pos); } diff --git a/src/Parsers/Kusto/ParserKQLDistinct.cpp b/src/Parsers/Kusto/ParserKQLDistinct.cpp index 3ec823a61b5..0a4aac64720 100644 --- a/src/Parsers/Kusto/ParserKQLDistinct.cpp +++ b/src/Parsers/Kusto/ParserKQLDistinct.cpp @@ -11,7 +11,7 @@ bool ParserKQLDistinct::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) expr = getExprFromToken(pos); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) diff --git a/src/Parsers/Kusto/ParserKQLExtend.cpp b/src/Parsers/Kusto/ParserKQLExtend.cpp index 41ce296bd25..87a0d7b355a 100644 --- a/src/Parsers/Kusto/ParserKQLExtend.cpp +++ b/src/Parsers/Kusto/ParserKQLExtend.cpp @@ -22,7 +22,7 @@ bool ParserKQLExtend ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String except_str; String new_extend_str; - Tokens ntokens(extend_expr.c_str(), extend_expr.c_str() + extend_expr.size()); + Tokens ntokens(extend_expr.data(), extend_expr.data() + extend_expr.size(), 0, true); IParser::Pos npos(ntokens, pos.max_depth, pos.max_backtracks); String alias; @@ -76,7 +76,7 @@ bool ParserKQLExtend ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) apply_alias(); String expr = std::format("SELECT * {}, {} from prev", except_str, new_extend_str); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserSelectQuery().parse(new_pos, select_query, expected)) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index b060ce8d2c7..5c7c22d5b22 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -13,7 +13,7 @@ bool ParserKQLFilter::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String expr = getExprFromToken(pos); ASTPtr where_expression; - Tokens token_filter(expr.c_str(), expr.c_str() + expr.size()); + Tokens token_filter(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos pos_filter(token_filter, pos.max_depth, pos.max_backtracks); if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 0eb460757b1..db6f4e7f46c 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -13,7 +13,7 @@ bool ParserKQLLimit::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto expr = getExprFromToken(pos); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected)) diff --git a/src/Parsers/Kusto/ParserKQLMVExpand.cpp b/src/Parsers/Kusto/ParserKQLMVExpand.cpp index 9beb1c39e34..835d50b42ac 100644 --- a/src/Parsers/Kusto/ParserKQLMVExpand.cpp +++ b/src/Parsers/Kusto/ParserKQLMVExpand.cpp @@ -298,7 +298,7 @@ bool ParserKQLMVExpand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; const String setting_str = "enable_unaligned_array_join = 1"; - Tokens token_settings(setting_str.c_str(), setting_str.c_str() + setting_str.size()); + Tokens token_settings(setting_str.data(), setting_str.data() + setting_str.size(), 0, true); IParser::Pos pos_settings(token_settings, pos.max_depth, pos.max_backtracks); if (!ParserSetQuery(true).parse(pos_settings, setting, expected)) diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp index f1ad9d9738b..d2ec059ddba 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -173,7 +173,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & auto date_type_cast = [&](String & src) { - Tokens tokens(src.c_str(), src.c_str() + src.size()); + Tokens tokens(src.data(), src.data() + src.size(), 0, true); IParser::Pos pos(tokens, max_depth, max_backtracks); String res; while (isValidKQLPos(pos)) @@ -200,7 +200,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & auto get_group_expression_alias = [&] { std::vector group_expression_tokens; - Tokens tokens(group_expression.c_str(), group_expression.c_str() + group_expression.size()); + Tokens tokens(group_expression.data(), group_expression.data() + group_expression.size(), 0, true); IParser::Pos pos(tokens, max_depth, max_backtracks); while (isValidKQLPos(pos)) { @@ -413,7 +413,7 @@ bool ParserKQLMakeSeries ::parseImpl(Pos & pos, ASTPtr & node, Expected & expect makeSeries(kql_make_series, node, pos.max_depth, pos.max_backtracks); - Tokens token_main_query(kql_make_series.main_query.c_str(), kql_make_series.main_query.c_str() + kql_make_series.main_query.size()); + Tokens token_main_query(kql_make_series.main_query.data(), kql_make_series.main_query.data() + kql_make_series.main_query.size(), 0, true); IParser::Pos pos_main_query(token_main_query, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(true).parse(pos_main_query, select_expression_list, expected)) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index d7364cb5fd7..c31c8711008 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -1,20 +1,26 @@ #include #include #include -#include #include -#include #include #include #include #include #include -#include "KustoFunctions/IParserKQLFunction.h" + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} namespace { -enum class KQLOperatorValue : uint16_t +enum class KQLOperatorValue { none, between, @@ -56,7 +62,8 @@ enum class KQLOperatorValue : uint16_t not_startswith_cs, }; -const std::unordered_map KQLOperator = { +const std::unordered_map KQLOperator = +{ {"between", KQLOperatorValue::between}, {"!between", KQLOperatorValue::not_between}, {"contains", KQLOperatorValue::contains}, @@ -96,44 +103,37 @@ const std::unordered_map KQLOperator = { {"!startswith_cs", KQLOperatorValue::not_startswith_cs}, }; -void rebuildSubqueryForInOperator(DB::ASTPtr & node, bool useLowerCase) +void rebuildSubqueryForInOperator(ASTPtr & node, bool useLowerCase) { //A sub-query for in operator in kql can have multiple columns, but only takes the first column. //A sub-query for in operator in ClickHouse can not have multiple columns //So only take the first column if there are multiple columns. //select * not working for subquery. (a tabular statement without project) - const auto selectColumns = node->children[0]->children[0]->as()->select(); + const auto selectColumns = node->children[0]->children[0]->as()->select(); while (selectColumns->children.size() > 1) selectColumns->children.pop_back(); if (useLowerCase) { - auto args = std::make_shared(); + auto args = std::make_shared(); args->children.push_back(selectColumns->children[0]); - auto func_lower = std::make_shared(); + auto func_lower = std::make_shared(); func_lower->name = "lower"; func_lower->children.push_back(selectColumns->children[0]); func_lower->arguments = args; - if (selectColumns->children[0]->as()) - func_lower->alias = std::move(selectColumns->children[0]->as()->alias); - else if (selectColumns->children[0]->as()) - func_lower->alias = std::move(selectColumns->children[0]->as()->alias); + if (selectColumns->children[0]->as()) + func_lower->alias = std::move(selectColumns->children[0]->as()->alias); + else if (selectColumns->children[0]->as()) + func_lower->alias = std::move(selectColumns->children[0]->as()->alias); - auto funcs = std::make_shared(); + auto funcs = std::make_shared(); funcs->children.push_back(func_lower); selectColumns->children[0] = std::move(funcs); } } } -namespace DB -{ - -namespace ErrorCodes -{ - extern const int SYNTAX_ERROR; -} String KQLOperators::genHasAnyAllOpExpr(std::vector & tokens, IParser::Pos & token_pos, String kql_op, String ch_op) { @@ -166,7 +166,7 @@ String KQLOperators::genHasAnyAllOpExpr(std::vector & tokens, IParser::P return new_expr; } -String genEqOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos, const String & ch_op) +String genEqOpExprCis(std::vector & tokens, IParser::Pos & token_pos, const String & ch_op) { String tmp_arg(token_pos->begin, token_pos->end); @@ -178,30 +178,30 @@ String genEqOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos new_expr += ch_op + " "; ++token_pos; - if (token_pos->type == DB::TokenType::StringLiteral || token_pos->type == DB::TokenType::QuotedIdentifier) - new_expr += "lower('" + DB::IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')"; + if (token_pos->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier) + new_expr += "lower('" + IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')"; else - new_expr += "lower(" + DB::IParserKQLFunction::getExpression(token_pos) + ")"; + new_expr += "lower(" + IParserKQLFunction::getExpression(token_pos) + ")"; tokens.pop_back(); return new_expr; } -String genInOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos, const String & kql_op, const String & ch_op) +String genInOpExprCis(std::vector & tokens, IParser::Pos & token_pos, const String & kql_op, const String & ch_op) { - DB::ParserKQLTableFunction kqlfun_p; - DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); + ParserKQLTableFunction kqlfun_p; + ParserToken s_lparen(TokenType::OpeningRoundBracket); - DB::ASTPtr select; - DB::Expected expected; + ASTPtr select; + Expected expected; String new_expr; ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); if (tokens.empty()) - throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); new_expr = "lower(" + tokens.back() + ") "; tokens.pop_back(); @@ -218,39 +218,39 @@ String genInOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos --token_pos; new_expr += ch_op; - while (isValidKQLPos(token_pos) && token_pos->type != DB::TokenType::PipeMark && token_pos->type != DB::TokenType::Semicolon) + while (isValidKQLPos(token_pos) && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) { auto tmp_arg = String(token_pos->begin, token_pos->end); - if (token_pos->type != DB::TokenType::Comma && token_pos->type != DB::TokenType::ClosingRoundBracket - && token_pos->type != DB::TokenType::OpeningRoundBracket && token_pos->type != DB::TokenType::OpeningSquareBracket - && token_pos->type != DB::TokenType::ClosingSquareBracket && tmp_arg != "~" && tmp_arg != "dynamic") + if (token_pos->type != TokenType::Comma && token_pos->type != TokenType::ClosingRoundBracket + && token_pos->type != TokenType::OpeningRoundBracket && token_pos->type != TokenType::OpeningSquareBracket + && token_pos->type != TokenType::ClosingSquareBracket && tmp_arg != "~" && tmp_arg != "dynamic") { - if (token_pos->type == DB::TokenType::StringLiteral || token_pos->type == DB::TokenType::QuotedIdentifier) - new_expr += "lower('" + DB::IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')"; + if (token_pos->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier) + new_expr += "lower('" + IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')"; else new_expr += "lower(" + tmp_arg + ")"; } else if (tmp_arg != "~" && tmp_arg != "dynamic" && tmp_arg != "[" && tmp_arg != "]") new_expr += tmp_arg; - if (token_pos->type == DB::TokenType::ClosingRoundBracket) + if (token_pos->type == TokenType::ClosingRoundBracket) break; ++token_pos; } return new_expr; } -std::string genInOpExpr(DB::IParser::Pos & token_pos, const std::string & kql_op, const std::string & ch_op) +std::string genInOpExpr(IParser::Pos & token_pos, const std::string & kql_op, const std::string & ch_op) { - DB::ParserKQLTableFunction kqlfun_p; - DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); + ParserKQLTableFunction kqlfun_p; + ParserToken s_lparen(TokenType::OpeningRoundBracket); - DB::ASTPtr select; - DB::Expected expected; + ASTPtr select; + Expected expected; ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); auto pos = token_pos; if (kqlfun_p.parse(pos, select, expected)) diff --git a/src/Parsers/Kusto/ParserKQLPrint.cpp b/src/Parsers/Kusto/ParserKQLPrint.cpp index 37483439f14..dceeed841b6 100644 --- a/src/Parsers/Kusto/ParserKQLPrint.cpp +++ b/src/Parsers/Kusto/ParserKQLPrint.cpp @@ -9,7 +9,7 @@ bool ParserKQLPrint::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr select_expression_list; const String expr = getExprFromToken(pos); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index eab9ee082c5..8542c1be734 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -11,7 +11,7 @@ bool ParserKQLProject ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) expr = getExprFromToken(pos); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 99b2d1da890..626512b6ea1 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -37,7 +37,7 @@ bool ParserKQLBase::parseByString(String expr, ASTPtr & node, uint32_t max_depth { Expected expected; - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos pos(tokens, max_depth, max_backtracks); return parse(pos, node, expected); } @@ -45,7 +45,7 @@ bool ParserKQLBase::parseByString(String expr, ASTPtr & node, uint32_t max_depth bool ParserKQLBase::parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks) { Expected expected; - Tokens token_subquery(query.c_str(), query.c_str() + query.size()); + Tokens token_subquery(query.data(), query.data() + query.size(), 0, true); IParser::Pos pos_subquery(token_subquery, max_depth, max_backtracks); if (!parser->parse(pos_subquery, select_node, expected)) return false; @@ -123,7 +123,7 @@ bool ParserKQLBase::setSubQuerySource(ASTPtr & select_query, ASTPtr & source, bo String ParserKQLBase::getExprFromToken(const String & text, uint32_t max_depth, uint32_t max_backtracks) { - Tokens tokens(text.c_str(), text.c_str() + text.size()); + Tokens tokens(text.data(), text.data() + text.size(), 0, true); IParser::Pos pos(tokens, max_depth, max_backtracks); return getExprFromToken(pos); @@ -522,7 +522,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) --last_pos; String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); - Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); + Tokens token_subquery(sub_query.data(), sub_query.data() + sub_query.size(), 0, true); IParser::Pos pos_subquery(token_subquery, pos.max_depth, pos.max_backtracks); if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) @@ -543,7 +543,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto oprator = getOperator(op_str); if (oprator) { - Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); + Tokens token_clause(op_calsue.data(), op_calsue.data() + op_calsue.size(), 0, true); IParser::Pos pos_clause(token_clause, pos.max_depth, pos.max_backtracks); if (!oprator->parse(pos_clause, node, expected)) return false; @@ -576,7 +576,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!node->as()->select()) { auto expr = String("*"); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!std::make_unique()->parse(new_pos, node, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 852ba50698d..98847cec2da 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -18,7 +18,7 @@ bool ParserKQLSort::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto expr = getExprFromToken(pos); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); auto pos_backup = new_pos; diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index e508b69bdff..9c3f35ff3dd 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -2,13 +2,13 @@ #include #include #include -#include #include #include #include #include #include + namespace DB { @@ -63,6 +63,8 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + /// TODO: This code is idiotic, see https://github.com/ClickHouse/ClickHouse/issues/61742 + ParserToken lparen(TokenType::OpeningRoundBracket); ASTPtr string_literal; @@ -101,13 +103,16 @@ bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ++pos; } - Tokens token_kql(kql_statement.data(), kql_statement.data() + kql_statement.size()); - IParser::Pos pos_kql(token_kql, pos.max_depth, pos.max_backtracks); + Tokens tokens_kql(kql_statement.data(), kql_statement.data() + kql_statement.size(), 0, true); + IParser::Pos pos_kql(tokens_kql, pos.max_depth, pos.max_backtracks); + Expected kql_expected; kql_expected.enable_highlighting = false; if (!ParserKQLWithUnionQuery().parse(pos_kql, node, kql_expected)) return false; + ++pos; return true; } + } diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index fe9b9adfa2a..b1cd782d36b 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -45,7 +45,7 @@ protected: class ParserKQLTableFunction : public IParserBase { protected: - const char * getName() const override { return "KQL() function"; } + const char * getName() const override { return "KQL function"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 47d706d0b4b..c26115c22b8 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -194,7 +194,7 @@ bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected String converted_columns = getExprFromToken(expr_columns, pos.max_depth, pos.max_backtracks); - Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + Tokens token_converted_columns(converted_columns.data(), converted_columns.data() + converted_columns.size(), 0, true); IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) @@ -206,7 +206,7 @@ bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected { String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth, pos.max_backtracks); - Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); + Tokens token_converted_groupby(converted_groupby.data(), converted_groupby.data() + converted_groupby.size(), 0, true); IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 5f6df33176f..bb37491a366 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -213,6 +213,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E return res; }; + /// Keep this list of keywords in sync with ParserDataType::parseImpl(). if (!null_check_without_moving() && !s_default.checkWithoutMoving(pos, expected) && !s_materialized.checkWithoutMoving(pos, expected) diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index b5bc9f89990..ad33c7e4558 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -103,12 +104,28 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; tryGetIdentifierNameInto(identifier, type_name); - /// Don't accept things like Array(`x.y`). + /// When parsing we accept quoted type names (e.g. `UInt64`), but when formatting we print them + /// unquoted (e.g. UInt64). This introduces problems when the string in the quotes is garbage: + /// * Array(`x.y`) -> Array(x.y) -> fails to parse + /// * `Null` -> Null -> parses as keyword instead of type name + /// Here we check for these cases and reject. if (!std::all_of(type_name.begin(), type_name.end(), [](char c) { return isWordCharASCII(c) || c == '$'; })) { expected.add(pos, "type name"); return false; } + /// Keywords that IParserColumnDeclaration recognizes before the type name. + /// E.g. reject CREATE TABLE a (x `Null`) because in "x Null" the Null would be parsed as + /// column attribute rather than type name. + { + String n = type_name; + boost::to_upper(n); + if (n == "NOT" || n == "NULL" || n == "DEFAULT" || n == "MATERIALIZED" || n == "EPHEMERAL" || n == "ALIAS" || n == "AUTO" || n == "PRIMARY" || n == "COMMENT" || n == "CODEC") + { + expected.add(pos, "type name"); + return false; + } + } String type_name_upper = Poco::toUpper(type_name); String type_name_suffix; diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h index 207ddadb8bf..0d18ee5439e 100644 --- a/src/Parsers/TokenIterator.h +++ b/src/Parsers/TokenIterator.h @@ -21,6 +21,7 @@ class Tokens { private: std::vector data; + size_t max_pos = 0; Lexer lexer; bool skip_insignificant; @@ -35,10 +36,16 @@ public: while (true) { if (index < data.size()) + { + max_pos = std::max(max_pos, index); return data[index]; + } if (!data.empty() && data.back().isEnd()) + { + max_pos = data.size() - 1; return data.back(); + } Token token = lexer.nextToken(); @@ -51,7 +58,12 @@ public: { if (data.empty()) return (*this)[0]; - return data.back(); + return data[max_pos]; + } + + void reset() + { + max_pos = 0; } }; diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 41c51267496..fab5dac8f87 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -285,6 +286,33 @@ ASTPtr tryParseQuery( } Expected expected; + + /** A shortcut - if Lexer found invalid tokens, fail early without full parsing. + * But there are certain cases when invalid tokens are permitted: + * 1. INSERT queries can have arbitrary data after the FORMAT clause, that is parsed by a different parser. + * 2. It can also be the case when there are multiple queries separated by semicolons, and the first queries are ok + * while subsequent queries have syntax errors. + * + * This shortcut is needed to avoid complex backtracking in case of obviously erroneous queries. + */ + IParser::Pos lookahead(token_iterator); + if (!ParserKeyword(Keyword::INSERT_INTO).ignore(lookahead)) + { + while (lookahead->type != TokenType::Semicolon && lookahead->type != TokenType::EndOfStream) + { + if (lookahead->isError()) + { + out_error_message = getLexicalErrorMessage(query_begin, all_queries_end, *lookahead, hilite, query_description); + return nullptr; + } + + ++lookahead; + } + + /// We should not spoil the info about maximum parsed position in the original iterator. + tokens.reset(); + } + ASTPtr res; const bool parse_res = parser.parse(token_iterator, res, expected); const auto last_token = token_iterator.max(); diff --git a/src/Planner/ActionsChain.cpp b/src/Planner/ActionsChain.cpp index c5438b5d2d4..1b594c5f2a1 100644 --- a/src/Planner/ActionsChain.cpp +++ b/src/Planner/ActionsChain.cpp @@ -11,7 +11,7 @@ namespace DB { -ActionsChainStep::ActionsChainStep(ActionsDAGPtr actions_, +ActionsChainStep::ActionsChainStep(ActionsAndProjectInputsFlagPtr actions_, bool use_actions_nodes_as_output_columns_, ColumnsWithTypeAndName additional_output_columns_) : actions(std::move(actions_)) @@ -28,12 +28,12 @@ void ActionsChainStep::finalizeInputAndOutputColumns(const NameSet & child_input auto child_input_columns_copy = child_input_columns; std::unordered_set output_nodes_names; - output_nodes_names.reserve(actions->getOutputs().size()); + output_nodes_names.reserve(actions->dag.getOutputs().size()); - for (auto & output_node : actions->getOutputs()) + for (auto & output_node : actions->dag.getOutputs()) output_nodes_names.insert(output_node->result_name); - for (const auto & node : actions->getNodes()) + for (const auto & node : actions->dag.getNodes()) { auto it = child_input_columns_copy.find(node.result_name); if (it == child_input_columns_copy.end()) @@ -45,20 +45,20 @@ void ActionsChainStep::finalizeInputAndOutputColumns(const NameSet & child_input if (output_nodes_names.contains(node.result_name)) continue; - actions->getOutputs().push_back(&node); + actions->dag.getOutputs().push_back(&node); output_nodes_names.insert(node.result_name); } - actions->removeUnusedActions(); + actions->dag.removeUnusedActions(); /// TODO: Analyzer fix ActionsDAG input and constant nodes with same name - actions->projectInput(); + actions->project_input = true; initialize(); } void ActionsChainStep::dump(WriteBuffer & buffer) const { buffer << "DAG" << '\n'; - buffer << actions->dumpDAG(); + buffer << actions->dag.dumpDAG(); if (!available_output_columns.empty()) { @@ -84,7 +84,7 @@ String ActionsChainStep::dump() const void ActionsChainStep::initialize() { - auto required_columns_names = actions->getRequiredColumnsNames(); + auto required_columns_names = actions->dag.getRequiredColumnsNames(); input_columns_names = NameSet(required_columns_names.begin(), required_columns_names.end()); available_output_columns.clear(); @@ -93,7 +93,7 @@ void ActionsChainStep::initialize() { std::unordered_set available_output_columns_names; - for (const auto & node : actions->getNodes()) + for (const auto & node : actions->dag.getNodes()) { if (available_output_columns_names.contains(node.result_name)) continue; diff --git a/src/Planner/ActionsChain.h b/src/Planner/ActionsChain.h index 4907fdbad87..3bce19786e6 100644 --- a/src/Planner/ActionsChain.h +++ b/src/Planner/ActionsChain.h @@ -48,18 +48,18 @@ public: * If use_actions_nodes_as_output_columns = true output columns are initialized using actions dag nodes. * If additional output columns are specified they are added to output columns. */ - explicit ActionsChainStep(ActionsDAGPtr actions_, + explicit ActionsChainStep(ActionsAndProjectInputsFlagPtr actions_, bool use_actions_nodes_as_output_columns = true, ColumnsWithTypeAndName additional_output_columns_ = {}); /// Get actions - ActionsDAGPtr & getActions() + ActionsAndProjectInputsFlagPtr & getActions() { return actions; } /// Get actions - const ActionsDAGPtr & getActions() const + const ActionsAndProjectInputsFlagPtr & getActions() const { return actions; } @@ -98,7 +98,7 @@ public: private: void initialize(); - ActionsDAGPtr actions; + ActionsAndProjectInputsFlagPtr actions; bool use_actions_nodes_as_output_columns = true; diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index 27b5909c13b..d5e39a9f123 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -90,7 +90,7 @@ public: ActionsDAGPtr alias_column_actions_dag = std::make_shared(); PlannerActionsVisitor actions_visitor(planner_context, false); - auto outputs = actions_visitor.visit(alias_column_actions_dag, column_node->getExpression()); + auto outputs = actions_visitor.visit(*alias_column_actions_dag, column_node->getExpression()); if (outputs.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single output in actions dag for alias column {}. Actual {}", column_node->dumpTree(), outputs.size()); @@ -340,7 +340,7 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr QueryTreeNodePtr query_tree_node = query_node_typed.getPrewhere(); PlannerActionsVisitor visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); - auto expression_nodes = visitor.visit(prewhere_actions_dag, query_tree_node); + auto expression_nodes = visitor.visit(*prewhere_actions_dag, query_tree_node); if (expression_nodes.size() != 1) throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Invalid PREWHERE. Expected single boolean expression. In query {}", diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 15b92ed12da..2d42ed73223 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -329,12 +329,16 @@ public: }; void addExpressionStep(QueryPlan & query_plan, - const ActionsDAGPtr & expression_actions, + const ActionsAndProjectInputsFlagPtr & expression_actions, const std::string & step_description, std::vector & result_actions_to_execute) { - result_actions_to_execute.push_back(expression_actions); - auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), expression_actions); + auto actions = expression_actions->dag.clone(); + if (expression_actions->project_input) + actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + + result_actions_to_execute.push_back(actions); + auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), actions); expression_step->setStepDescription(step_description); query_plan.addStep(std::move(expression_step)); } @@ -344,9 +348,13 @@ void addFilterStep(QueryPlan & query_plan, const std::string & step_description, std::vector & result_actions_to_execute) { - result_actions_to_execute.push_back(filter_analysis_result.filter_actions); + auto actions = filter_analysis_result.filter_actions->dag.clone(); + if (filter_analysis_result.filter_actions->project_input) + actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + + result_actions_to_execute.push_back(actions); auto where_step = std::make_unique(query_plan.getCurrentDataStream(), - filter_analysis_result.filter_actions, + actions, filter_analysis_result.filter_column_name, filter_analysis_result.remove_filter_column); where_step->setStepDescription(step_description); @@ -545,14 +553,21 @@ void addTotalsHavingStep(QueryPlan & query_plan, const auto & having_analysis_result = expression_analysis_result.getHaving(); bool need_finalize = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); + ActionsDAGPtr actions; if (having_analysis_result.filter_actions) - result_actions_to_execute.push_back(having_analysis_result.filter_actions); + { + actions = having_analysis_result.filter_actions->dag.clone(); + if (having_analysis_result.filter_actions->project_input) + actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + + result_actions_to_execute.push_back(actions); + } auto totals_having_step = std::make_unique( query_plan.getCurrentDataStream(), aggregation_analysis_result.aggregate_descriptions, query_analysis_result.aggregate_overflow_row, - having_analysis_result.filter_actions, + actions, having_analysis_result.filter_column_name, having_analysis_result.remove_filter_column, settings.totals_mode, @@ -728,12 +743,12 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, auto & interpolate_node_typed = interpolate_node->as(); PlannerActionsVisitor planner_actions_visitor(planner_context); - auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, + auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, interpolate_node_typed.getExpression()); if (expression_to_interpolate_expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression to interpolate expected to have single action node"); - auto interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, + auto interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); if (interpolate_expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interpolate expression expected to have single action node"); diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 837307ba2ca..7a12d5d690d 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -413,11 +413,11 @@ private: class ActionsScopeNode { public: - explicit ActionsScopeNode(ActionsDAGPtr actions_dag_, QueryTreeNodePtr scope_node_) - : actions_dag(std::move(actions_dag_)) + explicit ActionsScopeNode(ActionsDAG & actions_dag_, QueryTreeNodePtr scope_node_) + : actions_dag(actions_dag_) , scope_node(std::move(scope_node_)) { - for (const auto & node : actions_dag->getNodes()) + for (const auto & node : actions_dag.getNodes()) node_name_to_node[node.result_name] = &node; } @@ -456,7 +456,7 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "No node with name {}. There are only nodes {}", node_name, - actions_dag->dumpNames()); + actions_dag.dumpNames()); return it->second; } @@ -467,7 +467,7 @@ public: if (it != node_name_to_node.end()) return it->second; - const auto * node = &actions_dag->addInput(node_name, column_type); + const auto * node = &actions_dag.addInput(node_name, column_type); node_name_to_node[node->result_name] = node; return node; @@ -479,7 +479,7 @@ public: if (it != node_name_to_node.end()) return it->second; - const auto * node = &actions_dag->addInput(column); + const auto * node = &actions_dag.addInput(column); node_name_to_node[node->result_name] = node; return node; @@ -491,7 +491,7 @@ public: if (it != node_name_to_node.end()) return it->second; - const auto * node = &actions_dag->addColumn(column); + const auto * node = &actions_dag.addColumn(column); node_name_to_node[node->result_name] = node; return node; @@ -504,7 +504,7 @@ public: if (it != node_name_to_node.end()) return it->second; - const auto * node = &actions_dag->addFunction(function, children, node_name); + const auto * node = &actions_dag.addFunction(function, children, node_name); node_name_to_node[node->result_name] = node; return node; @@ -516,7 +516,7 @@ public: if (it != node_name_to_node.end()) return it->second; - const auto * node = &actions_dag->addArrayJoin(*child, node_name); + const auto * node = &actions_dag.addArrayJoin(*child, node_name); node_name_to_node[node->result_name] = node; return node; @@ -524,14 +524,14 @@ public: private: std::unordered_map node_name_to_node; - ActionsDAGPtr actions_dag; + ActionsDAG & actions_dag; QueryTreeNodePtr scope_node; }; class PlannerActionsVisitorImpl { public: - PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, + PlannerActionsVisitorImpl(ActionsDAG & actions_dag, const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_); @@ -595,14 +595,14 @@ private: bool use_column_identifier_as_action_node_name; }; -PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, +PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAG & actions_dag, const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_) : planner_context(planner_context_) , action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) { - actions_stack.emplace_back(std::move(actions_dag), nullptr); + actions_stack.emplace_back(actions_dag, nullptr); } ActionsDAG::NodeRawConstPtrs PlannerActionsVisitorImpl::visit(QueryTreeNodePtr expression_node) @@ -758,7 +758,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi } auto lambda_actions_dag = std::make_shared(); - actions_stack.emplace_back(lambda_actions_dag, node); + actions_stack.emplace_back(*lambda_actions_dag, node); auto [lambda_expression_node_name, levels] = visitImpl(lambda_node.getExpression()); lambda_actions_dag->getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name)); @@ -886,7 +886,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi for (const auto & argument : function_node.getArguments()) { - auto index_hint_argument_expression_dag_nodes = actions_visitor.visit(index_hint_actions_dag, argument); + auto index_hint_argument_expression_dag_nodes = actions_visitor.visit(*index_hint_actions_dag, argument); for (auto & expression_dag_node : index_hint_argument_expression_dag_nodes) { @@ -1013,7 +1013,7 @@ PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_c , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) {} -ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAGPtr actions_dag, QueryTreeNodePtr expression_node) +ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node) { PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name); return actions_visitor_impl.visit(expression_node); diff --git a/src/Planner/PlannerActionsVisitor.h b/src/Planner/PlannerActionsVisitor.h index 8506c309171..6bb32047327 100644 --- a/src/Planner/PlannerActionsVisitor.h +++ b/src/Planner/PlannerActionsVisitor.h @@ -37,7 +37,7 @@ public: * Necessary actions are not added in actions dag output. * Returns query tree expression node actions dag nodes. */ - ActionsDAG::NodeRawConstPtrs visit(ActionsDAGPtr actions_dag, QueryTreeNodePtr expression_node); + ActionsDAG::NodeRawConstPtrs visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node); private: const PlannerContextPtr planner_context; diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 060bbba1c05..ceb506d1bbb 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -45,8 +45,10 @@ std::optional analyzeFilter(const QueryTreeNodePtr & filte { FilterAnalysisResult result; - result.filter_actions = buildActionsDAGFromExpressionNode(filter_expression_node, input_columns, planner_context); - const auto * output = result.filter_actions->getOutputs().at(0); + result.filter_actions = std::make_shared(); + result.filter_actions->dag = buildActionsDAGFromExpressionNode(filter_expression_node, input_columns, planner_context); + + const auto * output = result.filter_actions->dag.getOutputs().at(0); if (output->column && ConstantFilterDescription(*output->column).always_true) return {}; @@ -116,8 +118,9 @@ std::optional analyzeAggregation(const QueryTreeNodeP Names aggregation_keys; - ActionsDAGPtr before_aggregation_actions = std::make_shared(input_columns); - before_aggregation_actions->getOutputs().clear(); + ActionsAndProjectInputsFlagPtr before_aggregation_actions = std::make_shared(); + before_aggregation_actions->dag = ActionsDAG(input_columns); + before_aggregation_actions->dag.getOutputs().clear(); std::unordered_set before_aggregation_actions_output_node_names; @@ -152,7 +155,7 @@ std::optional analyzeAggregation(const QueryTreeNodeP if (constant_key && !aggregates_descriptions.empty() && (!check_constants_for_group_by_key || canRemoveConstantFromGroupByKey(*constant_key))) continue; - auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, grouping_set_key_node); + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions->dag, grouping_set_key_node); aggregation_keys.reserve(expression_dag_nodes.size()); for (auto & expression_dag_node : expression_dag_nodes) @@ -165,7 +168,7 @@ std::optional analyzeAggregation(const QueryTreeNodeP auto column_after_aggregation = group_by_use_nulls && expression_dag_node->column != nullptr ? makeNullableSafe(expression_dag_node->column) : expression_dag_node->column; available_columns_after_aggregation.emplace_back(std::move(column_after_aggregation), expression_type_after_aggregation, expression_dag_node->result_name); aggregation_keys.push_back(expression_dag_node->result_name); - before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions->dag.getOutputs().push_back(expression_dag_node); before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); } } @@ -204,7 +207,7 @@ std::optional analyzeAggregation(const QueryTreeNodeP if (constant_key && !aggregates_descriptions.empty() && (!check_constants_for_group_by_key || canRemoveConstantFromGroupByKey(*constant_key))) continue; - auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, group_by_key_node); + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions->dag, group_by_key_node); aggregation_keys.reserve(expression_dag_nodes.size()); for (auto & expression_dag_node : expression_dag_nodes) @@ -216,7 +219,7 @@ std::optional analyzeAggregation(const QueryTreeNodeP auto column_after_aggregation = group_by_use_nulls && expression_dag_node->column != nullptr ? makeNullableSafe(expression_dag_node->column) : expression_dag_node->column; available_columns_after_aggregation.emplace_back(std::move(column_after_aggregation), expression_type_after_aggregation, expression_dag_node->result_name); aggregation_keys.push_back(expression_dag_node->result_name); - before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions->dag.getOutputs().push_back(expression_dag_node); before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); } } @@ -230,13 +233,13 @@ std::optional analyzeAggregation(const QueryTreeNodeP auto & aggregate_function_node_typed = aggregate_function_node->as(); for (const auto & aggregate_function_node_argument : aggregate_function_node_typed.getArguments().getNodes()) { - auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, aggregate_function_node_argument); + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions->dag, aggregate_function_node_argument); for (auto & expression_dag_node : expression_dag_nodes) { if (before_aggregation_actions_output_node_names.contains(expression_dag_node->result_name)) continue; - before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions->dag.getOutputs().push_back(expression_dag_node); before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); } } @@ -283,8 +286,9 @@ std::optional analyzeWindow(const QueryTreeNodePtr & query PlannerActionsVisitor actions_visitor(planner_context); - ActionsDAGPtr before_window_actions = std::make_shared(input_columns); - before_window_actions->getOutputs().clear(); + ActionsAndProjectInputsFlagPtr before_window_actions = std::make_shared(); + before_window_actions->dag = ActionsDAG(input_columns); + before_window_actions->dag.getOutputs().clear(); std::unordered_set before_window_actions_output_node_names; @@ -293,25 +297,25 @@ std::optional analyzeWindow(const QueryTreeNodePtr & query auto & window_function_node_typed = window_function_node->as(); auto & window_node = window_function_node_typed.getWindowNode()->as(); - auto expression_dag_nodes = actions_visitor.visit(before_window_actions, window_function_node_typed.getArgumentsNode()); + auto expression_dag_nodes = actions_visitor.visit(before_window_actions->dag, window_function_node_typed.getArgumentsNode()); for (auto & expression_dag_node : expression_dag_nodes) { if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) continue; - before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions->dag.getOutputs().push_back(expression_dag_node); before_window_actions_output_node_names.insert(expression_dag_node->result_name); } - expression_dag_nodes = actions_visitor.visit(before_window_actions, window_node.getPartitionByNode()); + expression_dag_nodes = actions_visitor.visit(before_window_actions->dag, window_node.getPartitionByNode()); for (auto & expression_dag_node : expression_dag_nodes) { if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) continue; - before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions->dag.getOutputs().push_back(expression_dag_node); before_window_actions_output_node_names.insert(expression_dag_node->result_name); } @@ -322,14 +326,14 @@ std::optional analyzeWindow(const QueryTreeNodePtr & query for (auto & sort_node : order_by_node_list.getNodes()) { auto & sort_node_typed = sort_node->as(); - expression_dag_nodes = actions_visitor.visit(before_window_actions, sort_node_typed.getExpression()); + expression_dag_nodes = actions_visitor.visit(before_window_actions->dag, sort_node_typed.getExpression()); for (auto & expression_dag_node : expression_dag_nodes) { if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) continue; - before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions->dag.getOutputs().push_back(expression_dag_node); before_window_actions_output_node_names.insert(expression_dag_node->result_name); } } @@ -362,7 +366,8 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node, const PlannerContextPtr & planner_context, ActionsChain & actions_chain) { - auto projection_actions = buildActionsDAGFromExpressionNode(query_node.getProjectionNode(), input_columns, planner_context); + auto projection_actions = std::make_shared(); + projection_actions->dag = buildActionsDAGFromExpressionNode(query_node.getProjectionNode(), input_columns, planner_context); auto projection_columns = query_node.getProjectionColumns(); size_t projection_columns_size = projection_columns.size(); @@ -371,7 +376,7 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node, NamesWithAliases projection_column_names_with_display_aliases; projection_column_names_with_display_aliases.reserve(projection_columns_size); - auto & projection_actions_outputs = projection_actions->getOutputs(); + auto & projection_actions_outputs = projection_actions->dag.getOutputs(); size_t projection_outputs_size = projection_actions_outputs.size(); if (projection_columns_size != projection_outputs_size) @@ -409,8 +414,9 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, const PlannerContextPtr & planner_context, ActionsChain & actions_chain) { - ActionsDAGPtr before_sort_actions = std::make_shared(input_columns); - auto & before_sort_actions_outputs = before_sort_actions->getOutputs(); + auto before_sort_actions = std::make_shared(); + before_sort_actions->dag = ActionsDAG(input_columns); + auto & before_sort_actions_outputs = before_sort_actions->dag.getOutputs(); before_sort_actions_outputs.clear(); PlannerActionsVisitor actions_visitor(planner_context); @@ -424,7 +430,7 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, for (const auto & sort_node : order_by_node_list.getNodes()) { auto & sort_node_typed = sort_node->as(); - auto expression_dag_nodes = actions_visitor.visit(before_sort_actions, sort_node_typed.getExpression()); + auto expression_dag_nodes = actions_visitor.visit(before_sort_actions->dag, sort_node_typed.getExpression()); has_with_fill |= sort_node_typed.withFill(); for (auto & action_dag_node : expression_dag_nodes) @@ -440,7 +446,7 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, if (has_with_fill) { for (auto & output_node : before_sort_actions_outputs) - output_node = &before_sort_actions->materializeNode(*output_node); + output_node = &before_sort_actions->dag.materializeNode(*output_node); } /// We add only INPUT columns necessary for INTERPOLATE expression in before ORDER BY actions DAG @@ -449,7 +455,7 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, auto & interpolate_list_node = query_node.getInterpolate()->as(); PlannerActionsVisitor interpolate_actions_visitor(planner_context); - auto interpolate_actions_dag = std::make_shared(); + ActionsDAG interpolate_actions_dag; for (auto & interpolate_node : interpolate_list_node.getNodes()) { @@ -458,10 +464,10 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, } std::unordered_map before_sort_actions_inputs_name_to_node; - for (const auto & node : before_sort_actions->getInputs()) + for (const auto & node : before_sort_actions->dag.getInputs()) before_sort_actions_inputs_name_to_node.emplace(node->result_name, node); - for (const auto & node : interpolate_actions_dag->getNodes()) + for (const auto & node : interpolate_actions_dag.getNodes()) { if (before_sort_actions_dag_output_node_names.contains(node.result_name) || node.type != ActionsDAG::ActionType::INPUT) @@ -471,7 +477,7 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, if (input_node_it == before_sort_actions_inputs_name_to_node.end()) { auto input_column = ColumnWithTypeAndName{node.column, node.result_type, node.result_name}; - const auto * input_node = &before_sort_actions->addInput(std::move(input_column)); + const auto * input_node = &before_sort_actions->dag.addInput(std::move(input_column)); auto [it, _] = before_sort_actions_inputs_name_to_node.emplace(node.result_name, input_node); input_node_it = it; } @@ -496,22 +502,23 @@ LimitByAnalysisResult analyzeLimitBy(const QueryNode & query_node, const NameSet & required_output_nodes_names, ActionsChain & actions_chain) { - auto before_limit_by_actions = buildActionsDAGFromExpressionNode(query_node.getLimitByNode(), input_columns, planner_context); + auto before_limit_by_actions = std::make_shared(); + before_limit_by_actions->dag = buildActionsDAGFromExpressionNode(query_node.getLimitByNode(), input_columns, planner_context); NameSet limit_by_column_names_set; Names limit_by_column_names; - limit_by_column_names.reserve(before_limit_by_actions->getOutputs().size()); - for (auto & output_node : before_limit_by_actions->getOutputs()) + limit_by_column_names.reserve(before_limit_by_actions->dag.getOutputs().size()); + for (auto & output_node : before_limit_by_actions->dag.getOutputs()) { limit_by_column_names_set.insert(output_node->result_name); limit_by_column_names.push_back(output_node->result_name); } - for (const auto & node : before_limit_by_actions->getNodes()) + for (const auto & node : before_limit_by_actions->dag.getNodes()) { if (required_output_nodes_names.contains(node.result_name) && !limit_by_column_names_set.contains(node.result_name)) - before_limit_by_actions->getOutputs().push_back(&node); + before_limit_by_actions->dag.getOutputs().push_back(&node); } auto actions_step_before_limit_by = std::make_unique(before_limit_by_actions); @@ -605,7 +612,7 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo if (sort_analysis_result_optional.has_value() && planner_query_processing_info.isFirstStage() && planner_query_processing_info.getToStage() != QueryProcessingStage::Complete) { const auto & before_order_by_actions = sort_analysis_result_optional->before_order_by_actions; - for (const auto & output_node : before_order_by_actions->getOutputs()) + for (const auto & output_node : before_order_by_actions->dag.getOutputs()) required_output_nodes_names.insert(output_node->result_name); } @@ -661,8 +668,10 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo } } - auto project_names_actions = std::make_shared(project_names_input); - project_names_actions->project(projection_analysis_result.projection_column_names_with_display_aliases); + auto project_names_actions = std::make_shared(); + project_names_actions->dag = ActionsDAG(project_names_input); + project_names_actions->dag.project(projection_analysis_result.projection_column_names_with_display_aliases); + project_names_actions->project_input = true; actions_chain.addStep(std::make_unique(project_names_actions)); actions_chain.finalize(); diff --git a/src/Planner/PlannerExpressionAnalysis.h b/src/Planner/PlannerExpressionAnalysis.h index 0773272e49a..820df7131a7 100644 --- a/src/Planner/PlannerExpressionAnalysis.h +++ b/src/Planner/PlannerExpressionAnalysis.h @@ -17,22 +17,22 @@ namespace DB struct ProjectionAnalysisResult { - ActionsDAGPtr projection_actions; + ActionsAndProjectInputsFlagPtr projection_actions; Names projection_column_names; NamesWithAliases projection_column_names_with_display_aliases; - ActionsDAGPtr project_names_actions; + ActionsAndProjectInputsFlagPtr project_names_actions; }; struct FilterAnalysisResult { - ActionsDAGPtr filter_actions; + ActionsAndProjectInputsFlagPtr filter_actions; std::string filter_column_name; bool remove_filter_column = false; }; struct AggregationAnalysisResult { - ActionsDAGPtr before_aggregation_actions; + ActionsAndProjectInputsFlagPtr before_aggregation_actions; Names aggregation_keys; AggregateDescriptions aggregate_descriptions; GroupingSetsParamsList grouping_sets_parameters_list; @@ -41,19 +41,19 @@ struct AggregationAnalysisResult struct WindowAnalysisResult { - ActionsDAGPtr before_window_actions; + ActionsAndProjectInputsFlagPtr before_window_actions; std::vector window_descriptions; }; struct SortAnalysisResult { - ActionsDAGPtr before_order_by_actions; + ActionsAndProjectInputsFlagPtr before_order_by_actions; bool has_with_fill = false; }; struct LimitByAnalysisResult { - ActionsDAGPtr before_limit_by_actions; + ActionsAndProjectInputsFlagPtr before_limit_by_actions; Names limit_by_column_names; }; diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 6ec460b0894..e163672966a 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -693,14 +693,14 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (select_query_info.local_storage_limits.local_limits.size_limits.max_rows != 0) { if (max_block_size_limited < select_query_info.local_storage_limits.local_limits.size_limits.max_rows) - table_expression_query_info.limit = max_block_size_limited; + table_expression_query_info.trivial_limit = max_block_size_limited; /// Ask to read just enough rows to make the max_rows limit effective (so it has a chance to be triggered). else if (select_query_info.local_storage_limits.local_limits.size_limits.max_rows < std::numeric_limits::max()) - table_expression_query_info.limit = 1 + select_query_info.local_storage_limits.local_limits.size_limits.max_rows; + table_expression_query_info.trivial_limit = 1 + select_query_info.local_storage_limits.local_limits.size_limits.max_rows; } else { - table_expression_query_info.limit = max_block_size_limited; + table_expression_query_info.trivial_limit = max_block_size_limited; } } @@ -913,8 +913,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto result_ptr = reading->selectRangesToRead(); UInt64 rows_to_read = result_ptr->selected_rows; - if (table_expression_query_info.limit > 0 && table_expression_query_info.limit < rows_to_read) - rows_to_read = table_expression_query_info.limit; + if (table_expression_query_info.trivial_limit > 0 && table_expression_query_info.trivial_limit < rows_to_read) + rows_to_read = table_expression_query_info.trivial_limit; if (max_block_size_limited && (max_block_size_limited < rows_to_read)) rows_to_read = max_block_size_limited; @@ -1132,7 +1132,7 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP } } - cast_actions_dag->projectInput(); + cast_actions_dag->appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); cast_join_columns_step->setStepDescription("Cast JOIN columns to Nullable"); plan_to_add_cast.addStep(std::move(cast_join_columns_step)); @@ -1178,12 +1178,12 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ join_table_expression, planner_context); - join_clauses_and_actions.left_join_expressions_actions->projectInput(); + join_clauses_and_actions.left_join_expressions_actions->appendInputsForUnusedColumns(left_plan.getCurrentDataStream().header); auto left_join_expressions_actions_step = std::make_unique(left_plan.getCurrentDataStream(), join_clauses_and_actions.left_join_expressions_actions); left_join_expressions_actions_step->setStepDescription("JOIN actions"); left_plan.addStep(std::move(left_join_expressions_actions_step)); - join_clauses_and_actions.right_join_expressions_actions->projectInput(); + join_clauses_and_actions.right_join_expressions_actions->appendInputsForUnusedColumns(right_plan.getCurrentDataStream().header); auto right_join_expressions_actions_step = std::make_unique(right_plan.getCurrentDataStream(), join_clauses_and_actions.right_join_expressions_actions); right_join_expressions_actions_step->setStepDescription("JOIN actions"); right_plan.addStep(std::move(right_join_expressions_actions_step)); @@ -1235,7 +1235,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ output_node = &cast_actions_dag->addCast(*output_node, cast_type, output_node->result_name); } - cast_actions_dag->projectInput(); + cast_actions_dag->appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); cast_join_columns_step->setStepDescription("Cast JOIN USING columns"); @@ -1630,7 +1630,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ array_join_column_names.insert(array_join_column_identifier); auto & array_join_expression_column = array_join_expression->as(); - auto expression_dag_index_nodes = actions_visitor.visit(array_join_action_dag, array_join_expression_column.getExpressionOrThrow()); + auto expression_dag_index_nodes = actions_visitor.visit(*array_join_action_dag, array_join_expression_column.getExpressionOrThrow()); for (auto & expression_dag_index_node : expression_dag_index_nodes) { @@ -1640,7 +1640,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ } } - array_join_action_dag->projectInput(); + array_join_action_dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); join_tree_query_plan.actions_dags.push_back(array_join_action_dag); diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index c410b04f209..67fe45cad7e 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -183,7 +183,7 @@ const ActionsDAG::Node * appendExpression( const JoinNode & join_node) { PlannerActionsVisitor join_expression_visitor(planner_context); - auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(dag, expression); + auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(*dag, expression); if (join_expression_dag_node_raw_pointers.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", @@ -603,7 +603,7 @@ JoinClausesAndActions buildJoinClausesAndActions( { auto mixed_join_expressions_actions = std::make_shared(mixed_table_expression_columns); PlannerActionsVisitor join_expression_visitor(planner_context); - auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(mixed_join_expressions_actions, join_expression); + auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(*mixed_join_expressions_actions, join_expression); if (join_expression_dag_node_raw_pointers.size() != 1) throw Exception( ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", join_node.formatASTForErrorMessage()); @@ -802,13 +802,12 @@ static std::shared_ptr tryCreateJoin(JoinAlgorithm algorithm, algorithm == JoinAlgorithm::PARALLEL_HASH || algorithm == JoinAlgorithm::DEFAULT) { - if (table_join->allowParallelHashJoin()) - { - auto query_context = planner_context->getQueryContext(); - return std::make_shared(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header); - } + auto query_context = planner_context->getQueryContext(); - return std::make_shared(table_join, right_table_expression_header); + if (table_join->allowParallelHashJoin()) + return std::make_shared(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header); + + return std::make_shared(table_join, right_table_expression_header, query_context->getSettingsRef().join_any_take_last_row); } if (algorithm == JoinAlgorithm::FULL_SORTING_MERGE) diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 4a74bf413d3..18a6d297838 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -213,14 +213,14 @@ StorageLimits buildStorageLimits(const Context & context, const SelectQueryOptio return {limits, leaf_limits}; } -ActionsDAGPtr buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, +ActionsDAG buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context) { - ActionsDAGPtr action_dag = std::make_shared(input_columns); + ActionsDAG action_dag(input_columns); PlannerActionsVisitor actions_visitor(planner_context); auto expression_dag_index_nodes = actions_visitor.visit(action_dag, expression_node); - action_dag->getOutputs() = std::move(expression_dag_index_nodes); + action_dag.getOutputs() = std::move(expression_dag_index_nodes); return action_dag; } @@ -443,7 +443,7 @@ FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, auto filter_actions_dag = std::make_shared(); PlannerActionsVisitor actions_visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); - auto expression_nodes = actions_visitor.visit(filter_actions_dag, filter_query_tree); + auto expression_nodes = actions_visitor.visit(*filter_actions_dag, filter_query_tree); if (expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filter actions must return single output node. Actual {}", diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index 4706f552c9d..3172847f053 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -47,7 +47,7 @@ StorageLimits buildStorageLimits(const Context & context, const SelectQueryOptio * Inputs are not used for actions dag outputs. * Only root query tree expression node is used as actions dag output. */ -ActionsDAGPtr buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, +ActionsDAG buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context); diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 8855a1bc28d..be4e9430c34 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -46,6 +46,15 @@ JSONEachRowRowInputFormat::JSONEachRowRowInputFormat( { const auto & header = getPort().getHeader(); name_map = header.getNamesToIndexesMap(); + if (format_settings_.json.ignore_key_case) + { + for (auto & it : name_map) + { + StringRef key = it.first; + String lower_case_key = transformFieldNameToLowerCase(key); + lower_case_name_map[lower_case_key] = key; + } + } if (format_settings_.import_nested_json) { for (size_t i = 0; i != header.columns(); ++i) @@ -171,7 +180,15 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns) skipUnknownField(name_ref); continue; } - const size_t column_index = columnIndex(name_ref, key_index); + size_t column_index = 0; + if (format_settings.json.ignore_key_case) + { + String lower_case_name = transformFieldNameToLowerCase(name_ref); + StringRef field_name_ref = lower_case_name_map[lower_case_name]; + column_index = columnIndex(field_name_ref, key_index); + } + else + column_index = columnIndex(name_ref, key_index); if (unlikely(ssize_t(column_index) < 0)) { diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h index d97aa2dad8d..8a1cef8fa9f 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h @@ -55,7 +55,13 @@ private: virtual void readRowStart(MutableColumns &) {} virtual void skipRowStart() {} - + String transformFieldNameToLowerCase(const StringRef & field_name) + { + String field_name_str = field_name.toString(); + std::transform(field_name_str.begin(), field_name_str.end(), field_name_str.begin(), + [](unsigned char c) { return std::tolower(c); }); + return field_name_str; + } /// Buffer for the read from the stream field name. Used when you have to copy it. /// Also, if processing of Nested data is in progress, it holds the common prefix /// of the nested column names (so that appending the field name to it produces @@ -74,7 +80,8 @@ private: /// Hash table match `field name -> position in the block`. NOTE You can use perfect hash map. Block::NameMap name_map; - + /// Hash table match `lower_case field name -> field name in the block`. + std::unordered_map lower_case_name_map; /// Cached search results for previous row (keyed as index in JSON object) - used as a hint. std::vector prev_positions; diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 2662232a048..a5d334f4f1d 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -321,6 +321,9 @@ void ParquetBlockOutputFormat::writeUsingArrow(std::vector chunks) parquet::WriterProperties::Builder builder; builder.version(getParquetVersion(format_settings)); builder.compression(getParquetCompression(format_settings.parquet.output_compression_method)); + // write page index is disable at default. + if (format_settings.parquet.write_page_index) + builder.enable_write_page_index(); parquet::ArrowWriterProperties::Builder writer_props_builder; if (format_settings.parquet.output_compliant_nested_types) diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp index 5ab5e5277aa..f403aca2280 100644 --- a/src/Processors/IProcessor.cpp +++ b/src/Processors/IProcessor.cpp @@ -1,21 +1,57 @@ #include #include +#include +#include +#include + namespace DB { +void IProcessor::cancel() +{ + + bool already_cancelled = is_cancelled.exchange(true, std::memory_order_acq_rel); + if (already_cancelled) + return; + + onCancel(); +} + +String IProcessor::debug() const +{ + WriteBufferFromOwnString buf; + writeString(getName(), buf); + buf.write('\n'); + + writeString("inputs (hasData, isFinished):\n", buf); + for (const auto & port : inputs) + { + buf.write('\t'); + writeBoolText(port.hasData(), buf); + buf.write(' '); + writeBoolText(port.isFinished(), buf); + buf.write('\n'); + } + + writeString("outputs (hasData, isNeeded):\n", buf); + for (const auto & port : outputs) + { + buf.write('\t'); + writeBoolText(port.hasData(), buf); + buf.write(' '); + writeBoolText(port.isNeeded(), buf); + buf.write('\n'); + } + + buf.finalize(); + return buf.str(); +} + void IProcessor::dump() const { - std::cerr << getName() << "\n"; - - std::cerr << "inputs:\n"; - for (const auto & port : inputs) - std::cerr << "\t" << port.hasData() << " " << port.isFinished() << "\n"; - - std::cerr << "outputs:\n"; - for (const auto & port : outputs) - std::cerr << "\t" << port.hasData() << " " << port.isNeeded() << "\n"; + std::cerr << debug(); } @@ -39,4 +75,3 @@ std::string IProcessor::statusToName(Status status) } } - diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 63f32d8deb7..6f779e7a8d4 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -238,12 +238,7 @@ public: /// In case if query was cancelled executor will wait till all processors finish their jobs. /// Generally, there is no reason to check this flag. However, it may be reasonable for long operations (e.g. i/o). bool isCancelled() const { return is_cancelled.load(std::memory_order_acquire); } - void cancel() - { - bool already_cancelled = is_cancelled.exchange(true, std::memory_order_acq_rel); - if (!already_cancelled) - onCancel(); - } + void cancel(); /// Additional method which is called in case if ports were updated while work() method. /// May be used to stop execution in rare cases. @@ -286,6 +281,7 @@ public: const auto & getOutputs() const { return outputs; } /// Debug output. + String debug() const; void dump() const; /// Used to print pipeline. diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index b33a373a970..b1ab5561958 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -107,7 +107,7 @@ struct Frame using Stack = std::vector; /// Second pass optimizations -void optimizePrimaryKeyCondition(const Stack & stack); +void optimizePrimaryKeyConditionAndLimit(const Stack & stack); void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes); void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes); void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &); diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp index a5cb5972bd8..6ace1b3b5ce 100644 --- a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp +++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp @@ -2,10 +2,25 @@ #include #include #include +#include +#include namespace DB::QueryPlanOptimizations { +static void removeFromOutputs(ActionsDAG & dag, const ActionsDAG::Node & node) +{ + auto & outputs = dag.getOutputs(); + for (size_t i = 0; i < outputs.size(); ++i) + { + if (&node == outputs[i]) + { + outputs.erase(outputs.begin() + i); + return; + } + } +} + size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) { if (parent_node->children.size() != 1) @@ -19,6 +34,7 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) auto * parent_expr = typeid_cast(parent.get()); auto * parent_filter = typeid_cast(parent.get()); auto * child_expr = typeid_cast(child.get()); + auto * child_filter = typeid_cast(child.get()); if (parent_expr && child_expr) { @@ -60,6 +76,42 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) parent_node->children.swap(child_node->children); return 1; } + else if (parent_filter && child_filter) + { + const auto & child_actions = child_filter->getExpression(); + const auto & parent_actions = parent_filter->getExpression(); + + if (child_actions->hasArrayJoin()) + return 0; + + auto actions = child_actions->clone(); + const auto & child_filter_node = actions->findInOutputs(child_filter->getFilterColumnName()); + if (child_filter->removesFilterColumn()) + removeFromOutputs(*actions, child_filter_node); + + actions->mergeInplace(std::move(*parent_actions->clone())); + + const auto & parent_filter_node = actions->findInOutputs(parent_filter->getFilterColumnName()); + if (parent_filter->removesFilterColumn()) + removeFromOutputs(*actions, parent_filter_node); + + FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); + const auto & condition = actions->addFunction(func_builder_and, {&child_filter_node, &parent_filter_node}, {}); + auto & outputs = actions->getOutputs(); + outputs.insert(outputs.begin(), &condition); + + actions->removeUnusedActions(false); + + auto filter = std::make_unique(child_filter->getInputStreams().front(), + actions, + condition.result_name, + true); + filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_filter->getStepDescription() + ")"); + + parent_node->step = std::move(filter); + parent_node->children.swap(child_node->children); + return 1; + } return 0; } diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp similarity index 68% rename from src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp rename to src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp index dbcaf5f00a7..da4e104d18b 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp @@ -1,13 +1,13 @@ #include #include #include -#include +#include #include namespace DB::QueryPlanOptimizations { -void optimizePrimaryKeyCondition(const Stack & stack) +void optimizePrimaryKeyConditionAndLimit(const Stack & stack) { const auto & frame = stack.back(); @@ -26,15 +26,25 @@ void optimizePrimaryKeyCondition(const Stack & stack) for (auto iter = stack.rbegin() + 1; iter != stack.rend(); ++iter) { if (auto * filter_step = typeid_cast(iter->node->step.get())) + { source_step_with_filter->addFilter(filter_step->getExpression(), filter_step->getFilterColumnName()); - - /// Note: actually, plan optimizations merge Filter and Expression steps. - /// Ideally, chain should look like (Expression -> ...) -> (Filter -> ...) -> ReadFromStorage, - /// So this is likely not needed. - else if (typeid_cast(iter->node->step.get())) - continue; - else + } + else if (auto * limit_step = typeid_cast(iter->node->step.get())) + { + source_step_with_filter->setLimit(limit_step->getLimitForSorting()); break; + } + else if (typeid_cast(iter->node->step.get())) + { + /// Note: actually, plan optimizations merge Filter and Expression steps. + /// Ideally, chain should look like (Expression -> ...) -> (Filter -> ...) -> ReadFromStorage, + /// So this is likely not needed. + continue; + } + else + { + break; + } } source_step_with_filter->applyFilters(); diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index c175cd516ac..537555afa2a 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -176,8 +176,6 @@ static void appendExpression(ActionsDAGPtr & dag, const ActionsDAGPtr & expressi dag->mergeInplace(std::move(*expression->clone())); else dag = expression->clone(); - - dag->projectInput(false); } /// This function builds a common DAG which is a merge of DAGs from Filter and Expression steps chain. diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index 136d474751a..25895788e2e 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -115,7 +115,7 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s while (!stack.empty()) { - optimizePrimaryKeyCondition(stack); + optimizePrimaryKeyConditionAndLimit(stack); /// NOTE: optimizePrewhere can modify the stack. /// Prewhere optimization relies on PK optimization (getConditionSelectivityEstimatorByPredicate) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 713f2f35fc8..70327bc95b4 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -77,7 +77,7 @@ static AggregateProjectionInfo getAggregatingProjectionInfo( AggregateProjectionInfo info; info.context = interpreter.getContext(); - info.before_aggregation = analysis_result.before_aggregation; + info.before_aggregation = analysis_result.before_aggregation->dag.clone(); info.keys = query_analyzer->aggregationKeys().getNames(); info.aggregates = query_analyzer->aggregates(); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 0dacdc0b958..433dd4beee8 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -120,6 +120,7 @@ namespace ProfileEvents extern const Event SelectedParts; extern const Event SelectedRanges; extern const Event SelectedMarks; + extern const Event SelectQueriesWithPrimaryKeyUsage; } namespace DB @@ -249,9 +250,9 @@ void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, c { /// Fail fast if estimated number of rows to read exceeds the limit size_t total_rows_estimate = selected_rows; - if (query_info_.limit > 0 && total_rows_estimate > query_info_.limit) + if (query_info_.trivial_limit > 0 && total_rows_estimate > query_info_.trivial_limit) { - total_rows_estimate = query_info_.limit; + total_rows_estimate = query_info_.trivial_limit; } limits.check(total_rows_estimate, 0, "rows (controlled by 'max_rows_to_read' setting)", ErrorCodes::TOO_MANY_ROWS); leaf_limits.check( @@ -397,8 +398,8 @@ Pipe ReadFromMergeTree::readFromPool( { size_t total_rows = parts_with_range.getRowsCountAllParts(); - if (query_info.limit > 0 && query_info.limit < total_rows) - total_rows = query_info.limit; + if (query_info.trivial_limit > 0 && query_info.trivial_limit < total_rows) + total_rows = query_info.trivial_limit; const auto & settings = context->getSettingsRef(); @@ -435,7 +436,7 @@ Pipe ReadFromMergeTree::readFromPool( * Because time spend during filling per thread tasks can be greater than whole query * execution for big tables with small limit. */ - bool use_prefetched_read_pool = query_info.limit == 0 && (allow_prefetched_remote || allow_prefetched_local); + bool use_prefetched_read_pool = query_info.trivial_limit == 0 && (allow_prefetched_remote || allow_prefetched_local); if (use_prefetched_read_pool) { @@ -500,11 +501,11 @@ Pipe ReadFromMergeTree::readInOrder( Names required_columns, PoolSettings pool_settings, ReadType read_type, - UInt64 limit) + UInt64 read_limit) { /// For reading in order it makes sense to read only /// one range per task to reduce number of read rows. - bool has_limit_below_one_block = read_type != ReadType::Default && limit && limit < block_size.max_block_size_rows; + bool has_limit_below_one_block = read_type != ReadType::Default && read_limit && read_limit < block_size.max_block_size_rows; MergeTreeReadPoolPtr pool; if (is_parallel_reading_from_replicas) @@ -562,9 +563,8 @@ Pipe ReadFromMergeTree::readInOrder( /// Actually it means that parallel reading from replicas enabled /// and we have to collaborate with initiator. /// In this case we won't set approximate rows, because it will be accounted multiple times. - /// Also do not count amount of read rows if we read in order of sorting key, - /// because we don't know actual amount of read rows in case when limit is set. - bool set_rows_approx = !is_parallel_reading_from_replicas && !reader_settings.read_in_order; + const auto in_order_limit = query_info.input_order_info ? query_info.input_order_info->limit : 0; + const bool set_total_rows_approx = !is_parallel_reading_from_replicas; Pipes pipes; for (size_t i = 0; i < parts_with_ranges.size(); ++i) @@ -572,8 +572,10 @@ Pipe ReadFromMergeTree::readInOrder( const auto & part_with_ranges = parts_with_ranges[i]; UInt64 total_rows = part_with_ranges.getRowsCount(); - if (query_info.limit > 0 && query_info.limit < total_rows) - total_rows = query_info.limit; + if (query_info.trivial_limit > 0 && query_info.trivial_limit < total_rows) + total_rows = query_info.trivial_limit; + else if (in_order_limit > 0 && in_order_limit < total_rows) + total_rows = in_order_limit; LOG_TRACE(log, "Reading {} ranges in{}order from part {}, approx. {} rows starting from {}", part_with_ranges.ranges.size(), @@ -594,7 +596,7 @@ Pipe ReadFromMergeTree::readInOrder( processor->addPartLevelToChunk(isQueryWithFinal()); auto source = std::make_shared(std::move(processor)); - if (set_rows_approx) + if (set_total_rows_approx) source->addTotalRowsApprox(total_rows); pipes.emplace_back(std::move(source)); @@ -849,10 +851,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_ static ActionsDAGPtr createProjection(const Block & header) { - auto projection = std::make_shared(header.getNamesAndTypesList()); - projection->removeUnusedActions(header.getNames()); - projection->projectInput(); - return projection; + return std::make_shared(header.getNamesAndTypesList()); } Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( @@ -1232,7 +1231,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( bool no_merging_final = do_not_merge_across_partitions_select_final && std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && parts_to_merge_ranges[range_index]->data_part->info.level > 0 && - data.merging_params.is_deleted_column.empty(); + data.merging_params.is_deleted_column.empty() && !reader_settings.read_in_order; if (no_merging_final) { @@ -1267,7 +1266,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// Parts of non-zero level still may contain duplicate PK values to merge on FINAL if there's is_deleted column, /// so we have to process all ranges. It would be more optimal to remove this flag and add an extra filtering step. bool split_parts_ranges_into_intersecting_and_non_intersecting_final = settings.split_parts_ranges_into_intersecting_and_non_intersecting_final && - data.merging_params.is_deleted_column.empty(); + data.merging_params.is_deleted_column.empty() && !reader_settings.read_in_order; SplitPartsWithRangesByPrimaryKeyResult split_ranges_result = splitPartsWithRangesByPrimaryKey( metadata_for_reading->getPrimaryKey(), @@ -1572,11 +1571,17 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( if (indexes->part_values && indexes->part_values->empty()) return std::make_shared(std::move(result)); - if (settings.force_primary_key && indexes->key_condition.alwaysUnknownOrTrue()) + if (indexes->key_condition.alwaysUnknownOrTrue()) { - throw Exception(ErrorCodes::INDEX_NOT_USED, - "Primary key ({}) is not used and setting 'force_primary_key' is set", - fmt::join(primary_key_column_names, ", ")); + if (settings.force_primary_key) + { + throw Exception(ErrorCodes::INDEX_NOT_USED, + "Primary key ({}) is not used and setting 'force_primary_key' is set", + fmt::join(primary_key_column_names, ", ")); + } + } else + { + ProfileEvents::increment(ProfileEvents::SelectQueriesWithPrimaryKeyUsage); } LOG_DEBUG(log, "Key condition: {}", indexes->key_condition.toString()); @@ -1672,7 +1677,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( return std::make_shared(std::move(result)); } -bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, size_t limit) +bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, size_t read_limit) { /// if dirction is not set, use current one if (!direction) @@ -1683,7 +1688,7 @@ bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, if (direction != 1 && query_info.isFinal()) return false; - query_info.input_order_info = std::make_shared(SortDescription{}, prefix_size, direction, limit); + query_info.input_order_info = std::make_shared(SortDescription{}, prefix_size, direction, read_limit); reader_settings.read_in_order = true; /// In case or read-in-order, don't create too many reading streams. diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 11371578c79..eb974259c5e 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -393,7 +393,7 @@ ReadFromSystemNumbersStep::ReadFromSystemNumbersStep( , num_streams{num_streams_} , limit_length_and_offset(InterpreterSelectQuery::getLimitLengthAndOffset(query_info.query->as(), context)) , should_pushdown_limit(shouldPushdownLimit(query_info, limit_length_and_offset.first)) - , limit(query_info.limit) + , query_info_limit(query_info.trivial_limit) , storage_limits(query_info.storage_limits) { storage_snapshot->check(column_names); @@ -563,7 +563,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() { auto rows_appr = (*numbers_storage.limit - 1) / numbers_storage.step + 1; if (limit > 0 && limit < rows_appr) - rows_appr = limit; + rows_appr = query_info_limit; source->addTotalRowsApprox(rows_appr); } diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h index bc84e31be62..e33d67d7150 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h @@ -41,7 +41,7 @@ private: size_t num_streams; std::pair limit_length_and_offset; bool should_pushdown_limit; - UInt64 limit; + UInt64 query_info_limit; std::shared_ptr storage_limits; }; diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.h b/src/Processors/QueryPlan/SourceStepWithFilter.h index 126d4824fff..ca4ea4f3704 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.h +++ b/src/Processors/QueryPlan/SourceStepWithFilter.h @@ -8,8 +8,9 @@ namespace DB { -/** Source step that can use filters for more efficient pipeline initialization. +/** Source step that can use filters and limit for more efficient pipeline initialization. * Filters must be added before pipeline initialization. + * Limit must be set before pipeline initialization. */ class SourceStepWithFilter : public ISourceStep { @@ -49,6 +50,11 @@ public: filter_dags.push_back(std::move(filter_dag)); } + void setLimit(size_t limit_value) + { + limit = limit_value; + } + /// Apply filters that can optimize reading from storage. void applyFilters() { @@ -72,6 +78,7 @@ protected: PrewhereInfoPtr prewhere_info; StorageSnapshotPtr storage_snapshot; ContextPtr context; + std::optional limit; ActionsDAGPtr filter_actions_dag; diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index 4b828d6699c..f18c63ed385 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -120,7 +120,7 @@ Chunk PostgreSQLSource::generate() MutableColumns columns = description.sample_block.cloneEmptyColumns(); size_t num_rows = 0; - while (true) + while (!isCancelled()) { const std::vector * row{stream->read_row()}; diff --git a/src/Processors/Sources/RecursiveCTESource.cpp b/src/Processors/Sources/RecursiveCTESource.cpp index 93503b45aaf..221198c622a 100644 --- a/src/Processors/Sources/RecursiveCTESource.cpp +++ b/src/Processors/Sources/RecursiveCTESource.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Processors/Transforms/AddingDefaultsTransform.cpp b/src/Processors/Transforms/AddingDefaultsTransform.cpp index e6c2bcec2c8..7945b3999c1 100644 --- a/src/Processors/Transforms/AddingDefaultsTransform.cpp +++ b/src/Processors/Transforms/AddingDefaultsTransform.cpp @@ -178,7 +178,7 @@ void AddingDefaultsTransform::transform(Chunk & chunk) auto dag = evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false); if (dag) { - auto actions = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); + auto actions = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes), true); actions->execute(evaluate_block); } diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index b48d435720a..65f0612d738 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include namespace ProfileEvents @@ -783,7 +783,7 @@ void AggregatingTransform::initGenerate() { /// Just a reasonable constant, matches default value for the setting `preferred_block_size_bytes` static constexpr size_t oneMB = 1024 * 1024; - return std::make_shared(header, params->params.max_block_size, oneMB); + return std::make_shared(header, params->params.max_block_size, oneMB); }); } /// AggregatingTransform::expandPipeline expects single output port. diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h new file mode 100644 index 00000000000..965a084bb13 --- /dev/null +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -0,0 +1,63 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class ApplySquashingTransform : public ExceptionKeepingTransform +{ +public: + explicit ApplySquashingTransform(const Block & header, const size_t min_block_size_rows, const size_t min_block_size_bytes) + : ExceptionKeepingTransform(header, header, false) + , squashing(header, min_block_size_rows, min_block_size_bytes) + { + } + + String getName() const override { return "ApplySquashingTransform"; } + + void work() override + { + if (stage == Stage::Exception) + { + data.chunk.clear(); + ready_input = false; + return; + } + + ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } + } + +protected: + void onConsume(Chunk chunk) override + { + if (auto res_chunk = DB::Squashing::squash(std::move(chunk))) + cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); + } + + GenerateResult onGenerate() override + { + GenerateResult res; + res.chunk = std::move(cur_chunk); + res.is_done = true; + return res; + } + void onFinish() override + { + auto chunk = DB::Squashing::squash({}); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); + } + +private: + Squashing squashing; + Chunk cur_chunk; + Chunk finish_chunk; +}; + +} diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 159a3244fe9..fb3b2faa9c5 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -9,7 +9,6 @@ #include #include -#include #include #include #include @@ -19,6 +18,7 @@ #include #include #include +#include #include @@ -40,7 +40,7 @@ FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns) desc.reserve(columns.size()); for (const auto & name : columns) desc.emplace_back(name); - return std::make_unique(materializeBlock(block), desc); + return std::make_unique(block, desc); } template @@ -234,9 +234,14 @@ void inline addMany(PaddedPODArray & left_or_right_map, size_t idx, size for (size_t i = 0; i < num; ++i) left_or_right_map.push_back(idx); } - } +FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_) + : sample_block(materializeBlock(sample_block_).cloneEmpty()), desc(description_) +{ +} + + const Chunk & FullMergeJoinCursor::getCurrent() const { return current_chunk; @@ -260,6 +265,10 @@ void FullMergeJoinCursor::setChunk(Chunk && chunk) return; } + // should match the structure of sample_block (after materialization) + convertToFullIfConst(chunk); + convertToFullIfSparse(chunk); + current_chunk = std::move(chunk); cursor = SortCursorImpl(sample_block, current_chunk.getColumns(), desc); } diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index cf9331abd59..5ca6b076544 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -193,11 +193,7 @@ private: class FullMergeJoinCursor : boost::noncopyable { public: - explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_) - : sample_block(sample_block_.cloneEmpty()) - , desc(description_) - { - } + explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_); bool fullyCompleted() const; void setChunk(Chunk && chunk); diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp new file mode 100644 index 00000000000..0f433165f14 --- /dev/null +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -0,0 +1,44 @@ +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +PlanSquashingTransform::PlanSquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes) +{ +} + +void PlanSquashingTransform::consume(Chunk chunk) +{ + if (Chunk current_chunk = squashing.add(std::move(chunk)); current_chunk.hasChunkInfo()) + squashed_chunk.swap(current_chunk); +} + +Chunk PlanSquashingTransform::generate() +{ + if (!squashed_chunk.hasChunkInfo()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); + + Chunk result_chunk; + result_chunk.swap(squashed_chunk); + return result_chunk; +} + +bool PlanSquashingTransform::canGenerate() +{ + return squashed_chunk.hasChunkInfo(); +} + +Chunk PlanSquashingTransform::getRemaining() +{ + Chunk current_chunk = squashing.flush(); + return current_chunk; +} +} diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h new file mode 100644 index 00000000000..4ad2ec2d089 --- /dev/null +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class PlanSquashingTransform : public IInflatingTransform +{ +public: + PlanSquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + + String getName() const override { return "PlanSquashingTransform"; } + +protected: + void consume(Chunk chunk) override; + bool canGenerate() override; + Chunk generate() override; + Chunk getRemaining() override; + +private: + Squashing squashing; + Chunk squashed_chunk; + Chunk finish_chunk; +}; +} + diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp deleted file mode 100644 index ed67dd508f3..00000000000 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ /dev/null @@ -1,94 +0,0 @@ -#include - -namespace DB -{ - -namespace ErrorCodes -{ -extern const int LOGICAL_ERROR; -} - -SquashingChunksTransform::SquashingChunksTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) -{ -} - -void SquashingChunksTransform::onConsume(Chunk chunk) -{ - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) - { - cur_chunk.setColumns(block.getColumns(), block.rows()); - } -} - -SquashingChunksTransform::GenerateResult SquashingChunksTransform::onGenerate() -{ - GenerateResult res; - res.chunk = std::move(cur_chunk); - res.is_done = true; - return res; -} - -void SquashingChunksTransform::onFinish() -{ - auto block = squashing.add({}); - finish_chunk.setColumns(block.getColumns(), block.rows()); -} - -void SquashingChunksTransform::work() -{ - if (stage == Stage::Exception) - { - data.chunk.clear(); - ready_input = false; - return; - } - - ExceptionKeepingTransform::work(); - if (finish_chunk) - { - data.chunk = std::move(finish_chunk); - ready_output = true; - } -} - -SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes) -{ -} - -void SimpleSquashingChunksTransform::consume(Chunk chunk) -{ - Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); - squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); -} - -Chunk SimpleSquashingChunksTransform::generate() -{ - if (squashed_chunk.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); - - Chunk result_chunk; - result_chunk.swap(squashed_chunk); - return result_chunk; -} - -bool SimpleSquashingChunksTransform::canGenerate() -{ - return !squashed_chunk.empty(); -} - -Chunk SimpleSquashingChunksTransform::getRemaining() -{ - Block current_block = squashing.add({}); - squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); - - Chunk result_chunk; - result_chunk.swap(squashed_chunk); - return result_chunk; -} - -} diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp new file mode 100644 index 00000000000..34b733cde5e --- /dev/null +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -0,0 +1,108 @@ +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +SquashingTransform::SquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : ExceptionKeepingTransform(header, header, false) + , squashing(header, min_block_size_rows, min_block_size_bytes) +{ +} + +void SquashingTransform::onConsume(Chunk chunk) +{ + Chunk planned_chunk = squashing.add(std::move(chunk)); + if (planned_chunk.hasChunkInfo()) + cur_chunk = DB::Squashing::squash(std::move(planned_chunk)); +} + +SquashingTransform::GenerateResult SquashingTransform::onGenerate() +{ + GenerateResult res; + res.chunk = std::move(cur_chunk); + res.is_done = true; + return res; +} + +void SquashingTransform::onFinish() +{ + Chunk chunk = squashing.flush(); + if (chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(chunk)); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); +} + +void SquashingTransform::work() +{ + if (stage == Stage::Exception) + { + data.chunk.clear(); + ready_input = false; + return; + } + + ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } +} + +SimpleSquashingTransform::SimpleSquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : ISimpleTransform(header, header, false) + , squashing(header, min_block_size_rows, min_block_size_bytes) +{ +} + +void SimpleSquashingTransform::transform(Chunk & chunk) +{ + if (!finished) + { + Chunk planned_chunk = squashing.add(std::move(chunk)); + if (planned_chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(planned_chunk)); + } + else + { + if (chunk.hasRows()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); + + chunk = squashing.flush(); + if (chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(chunk)); + } +} + +IProcessor::Status SimpleSquashingTransform::prepare() +{ + if (!finished && input.isFinished()) + { + if (output.isFinished()) + return Status::Finished; + + if (!output.canPush()) + return Status::PortFull; + + if (has_output) + { + output.pushData(std::move(output_data)); + has_output = false; + return Status::PortFull; + } + + finished = true; + /// On the next call to transform() we will return all data buffered in `squashing` (if any) + return Status::Ready; + } + return ISimpleTransform::prepare(); +} +} diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingTransform.h similarity index 54% rename from src/Processors/Transforms/SquashingChunksTransform.h rename to src/Processors/Transforms/SquashingTransform.h index 8c30a6032e4..c5b727ac6ec 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -1,17 +1,17 @@ #pragma once -#include +#include #include -#include #include +#include namespace DB { -class SquashingChunksTransform : public ExceptionKeepingTransform +class SquashingTransform : public ExceptionKeepingTransform { public: - explicit SquashingChunksTransform( + explicit SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "SquashingTransform"; } @@ -24,28 +24,27 @@ protected: void onFinish() override; private: - SquashingTransform squashing; + Squashing squashing; Chunk cur_chunk; Chunk finish_chunk; }; /// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. -class SimpleSquashingChunksTransform : public IInflatingTransform +class SimpleSquashingTransform : public ISimpleTransform { public: - explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + explicit SimpleSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "SimpleSquashingTransform"; } protected: - void consume(Chunk chunk) override; - bool canGenerate() override; - Chunk generate() override; - Chunk getRemaining() override; + void transform(Chunk &) override; + + IProcessor::Status prepare() override; private: - SquashingTransform squashing; - Chunk squashed_chunk; -}; + Squashing squashing; + bool finished = false; +}; } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index a1a886fb4f7..25fbf13b0e7 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -6,7 +6,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -371,7 +372,7 @@ std::optional generateViewChain( bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( + out.addSource(std::make_shared( out.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); @@ -622,7 +623,7 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat /// Squashing is needed here because the materialized view query can generate a lot of blocks /// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY /// and two-level aggregation is triggered). - pipeline.addTransform(std::make_shared( + pipeline.addTransform(std::make_shared( pipeline.getHeader(), context->getSettingsRef().min_insert_block_size_rows, context->getSettingsRef().min_insert_block_size_bytes)); diff --git a/src/QueryPipeline/SizeLimits.cpp b/src/QueryPipeline/SizeLimits.cpp index 76832b1f951..4161f3f365f 100644 --- a/src/QueryPipeline/SizeLimits.cpp +++ b/src/QueryPipeline/SizeLimits.cpp @@ -2,7 +2,6 @@ #include #include #include -#include namespace ProfileEvents diff --git a/src/Server/CertificateReloader.cpp b/src/Server/CertificateReloader.cpp index 98d7a362bd7..df7b6e7fbd7 100644 --- a/src/Server/CertificateReloader.cpp +++ b/src/Server/CertificateReloader.cpp @@ -15,18 +15,23 @@ namespace DB namespace { + /// Call set process for certificate. -int callSetCertificate(SSL * ssl, [[maybe_unused]] void * arg) +int callSetCertificate(SSL * ssl, void * arg) { - return CertificateReloader::instance().setCertificate(ssl); + if (!arg) + return -1; + + const CertificateReloader::MultiData * pdata = reinterpret_cast(arg); + return CertificateReloader::instance().setCertificate(ssl, pdata); } } /// This is callback for OpenSSL. It will be called on every connection to obtain a certificate and private key. -int CertificateReloader::setCertificate(SSL * ssl) +int CertificateReloader::setCertificate(SSL * ssl, const CertificateReloader::MultiData * pdata) { - auto current = data.get(); + auto current = pdata->data.get(); if (!current) return -1; @@ -65,24 +70,54 @@ int CertificateReloader::setCertificate(SSL * ssl) } -void CertificateReloader::init() +void CertificateReloader::init(MultiData * pdata) { LOG_DEBUG(log, "Initializing certificate reloader."); /// Set a callback for OpenSSL to allow get the updated cert and key. - auto* ctx = Poco::Net::SSLManager::instance().defaultServerContext()->sslContext(); - SSL_CTX_set_cert_cb(ctx, callSetCertificate, nullptr); - init_was_not_made = false; + SSL_CTX_set_cert_cb(pdata->ctx, callSetCertificate, reinterpret_cast(pdata)); + pdata->init_was_not_made = false; } void CertificateReloader::tryLoad(const Poco::Util::AbstractConfiguration & config) +{ + tryLoad(config, nullptr, Poco::Net::SSLManager::CFG_SERVER_PREFIX); +} + + +void CertificateReloader::tryLoad(const Poco::Util::AbstractConfiguration & config, SSL_CTX * ctx, const std::string & prefix) +{ + std::lock_guard lock{data_mutex}; + tryLoadImpl(config, ctx, prefix); +} + + +std::list::iterator CertificateReloader::findOrInsert(SSL_CTX * ctx, const std::string & prefix) +{ + auto it = data.end(); + auto i = data_index.find(prefix); + if (i != data_index.end()) + it = i->second; + else + { + if (!ctx) + ctx = Poco::Net::SSLManager::instance().defaultServerContext()->sslContext(); + data.push_back(MultiData(ctx)); + --it; + data_index[prefix] = it; + } + return it; +} + + +void CertificateReloader::tryLoadImpl(const Poco::Util::AbstractConfiguration & config, SSL_CTX * ctx, const std::string & prefix) { /// If at least one of the files is modified - recreate - std::string new_cert_path = config.getString("openSSL.server.certificateFile", ""); - std::string new_key_path = config.getString("openSSL.server.privateKeyFile", ""); + std::string new_cert_path = config.getString(prefix + "certificateFile", ""); + std::string new_key_path = config.getString(prefix + "privateKeyFile", ""); /// For empty paths (that means, that user doesn't want to use certificates) /// no processing required @@ -93,32 +128,41 @@ void CertificateReloader::tryLoad(const Poco::Util::AbstractConfiguration & conf } else { - bool cert_file_changed = cert_file.changeIfModified(std::move(new_cert_path), log); - bool key_file_changed = key_file.changeIfModified(std::move(new_key_path), log); - std::string pass_phrase = config.getString("openSSL.server.privateKeyPassphraseHandler.options.password", ""); - - if (cert_file_changed || key_file_changed) - { - LOG_DEBUG(log, "Reloading certificate ({}) and key ({}).", cert_file.path, key_file.path); - data.set(std::make_unique(cert_file.path, key_file.path, pass_phrase)); - LOG_INFO(log, "Reloaded certificate ({}) and key ({}).", cert_file.path, key_file.path); - } - - /// If callback is not set yet try { - if (init_was_not_made) - init(); + auto it = findOrInsert(ctx, prefix); + + bool cert_file_changed = it->cert_file.changeIfModified(std::move(new_cert_path), log); + bool key_file_changed = it->key_file.changeIfModified(std::move(new_key_path), log); + + if (cert_file_changed || key_file_changed) + { + LOG_DEBUG(log, "Reloading certificate ({}) and key ({}).", it->cert_file.path, it->key_file.path); + std::string pass_phrase = config.getString(prefix + "privateKeyPassphraseHandler.options.password", ""); + it->data.set(std::make_unique(it->cert_file.path, it->key_file.path, pass_phrase)); + LOG_INFO(log, "Reloaded certificate ({}) and key ({}).", it->cert_file.path, it->key_file.path); + } + + /// If callback is not set yet + if (it->init_was_not_made) + init(&*it); } catch (...) { - init_was_not_made = true; LOG_ERROR(log, getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false)); } } } +void CertificateReloader::tryReloadAll(const Poco::Util::AbstractConfiguration & config) +{ + std::lock_guard lock{data_mutex}; + for (auto & item : data_index) + tryLoadImpl(config, item.second->ctx, item.first); +} + + CertificateReloader::Data::Data(std::string cert_path, std::string key_path, std::string pass_phrase) : certs_chain(Poco::Crypto::X509Certificate::readPEM(cert_path)), key(/* public key */ "", /* private key */ key_path, pass_phrase) { diff --git a/src/Server/CertificateReloader.h b/src/Server/CertificateReloader.h index 5ab799037d5..7472d2f6baa 100644 --- a/src/Server/CertificateReloader.h +++ b/src/Server/CertificateReloader.h @@ -6,6 +6,9 @@ #include #include +#include +#include +#include #include #include @@ -31,28 +34,13 @@ class CertificateReloader public: using stat_t = struct stat; - /// Singleton - CertificateReloader(CertificateReloader const &) = delete; - void operator=(CertificateReloader const &) = delete; - static CertificateReloader & instance() + struct Data { - static CertificateReloader instance; - return instance; - } + Poco::Crypto::X509Certificate::List certs_chain; + Poco::Crypto::EVPPKey key; - /// Initialize the callback and perform the initial cert loading - void init(); - - /// Handle configuration reload - void tryLoad(const Poco::Util::AbstractConfiguration & config); - - /// A callback for OpenSSL - int setCertificate(SSL * ssl); - -private: - CertificateReloader() = default; - - LoggerPtr log = getLogger("CertificateReloader"); + Data(std::string cert_path, std::string key_path, std::string pass_phrase); + }; struct File { @@ -65,19 +53,55 @@ private: bool changeIfModified(std::string new_path, LoggerPtr logger); }; - File cert_file{"certificate"}; - File key_file{"key"}; - - struct Data + struct MultiData { - Poco::Crypto::X509Certificate::List certs_chain; - Poco::Crypto::EVPPKey key; + SSL_CTX * ctx = nullptr; + MultiVersion data; + bool init_was_not_made = true; - Data(std::string cert_path, std::string key_path, std::string pass_phrase); + File cert_file{"certificate"}; + File key_file{"key"}; + + explicit MultiData(SSL_CTX * ctx_) : ctx(ctx_) {} }; - MultiVersion data; - bool init_was_not_made = true; + /// Singleton + CertificateReloader(CertificateReloader const &) = delete; + void operator=(CertificateReloader const &) = delete; + static CertificateReloader & instance() + { + static CertificateReloader instance; + return instance; + } + + /// Handle configuration reload for default path + void tryLoad(const Poco::Util::AbstractConfiguration & config); + + /// Handle configuration reload + void tryLoad(const Poco::Util::AbstractConfiguration & config, SSL_CTX * ctx, const std::string & prefix); + + /// Handle configuration reload for all contexts + void tryReloadAll(const Poco::Util::AbstractConfiguration & config); + + /// A callback for OpenSSL + int setCertificate(SSL * ssl, const MultiData * pdata); + +private: + CertificateReloader() = default; + + /// Initialize the callback and perform the initial cert loading + void init(MultiData * pdata) TSA_REQUIRES(data_mutex); + + /// Unsafe implementation + void tryLoadImpl(const Poco::Util::AbstractConfiguration & config, SSL_CTX * ctx, const std::string & prefix) TSA_REQUIRES(data_mutex); + + std::list::iterator findOrInsert(SSL_CTX * ctx, const std::string & prefix) TSA_REQUIRES(data_mutex); + + LoggerPtr log = getLogger("CertificateReloader"); + + std::list data TSA_GUARDED_BY(data_mutex); + std::unordered_map::iterator> data_index TSA_GUARDED_BY(data_mutex); + mutable std::mutex data_mutex; }; } diff --git a/src/Server/CloudPlacementInfo.cpp b/src/Server/CloudPlacementInfo.cpp index 0790f825a45..d8810bb30de 100644 --- a/src/Server/CloudPlacementInfo.cpp +++ b/src/Server/CloudPlacementInfo.cpp @@ -11,6 +11,11 @@ namespace DB { +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + namespace PlacementInfo { @@ -46,7 +51,15 @@ PlacementInfo & PlacementInfo::instance() } void PlacementInfo::initialize(const Poco::Util::AbstractConfiguration & config) +try { + if (!config.has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX)) + { + availability_zone = ""; + initialized = true; + return; + } + use_imds = config.getBool(getConfigPath("use_imds"), false); if (use_imds) @@ -67,14 +80,17 @@ void PlacementInfo::initialize(const Poco::Util::AbstractConfiguration & config) LOG_DEBUG(log, "Loaded info: availability_zone: {}", availability_zone); initialized = true; } +catch (...) +{ + tryLogCurrentException("Failed to get availability zone"); + availability_zone = ""; + initialized = true; +} std::string PlacementInfo::getAvailabilityZone() const { if (!initialized) - { - LOG_WARNING(log, "Placement info has not been loaded"); - return ""; - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Placement info has not been loaded"); return availability_zone; } diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index 8098671a903..e2098b284bf 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -162,7 +162,8 @@ WriteBufferFromHTTPServerResponse::~WriteBufferFromHTTPServerResponse() { try { - finalize(); + if (!canceled) + finalize(); } catch (...) { diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index f8c37ebf794..3241e22fa35 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -1027,14 +1027,7 @@ catch (...) { tryLogCurrentException(log, "Cannot send exception to client"); - try - { - used_output.finalize(); - } - catch (...) - { - tryLogCurrentException(log, "Cannot flush data to client (after sending exception)"); - } + used_output.cancel(); } void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) @@ -1046,12 +1039,21 @@ void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest /// FIXME: make sure that no one else is reading from the same stream at the moment. - /// If HTTP method is POST and Keep-Alive is turned on, we should read the whole request body + /// If HTTP method is POST and Keep-Alive is turned on, we should try to read the whole request body /// to avoid reading part of the current request body in the next request. if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive() - && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED && !request.getStream().eof()) + && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED) { - request.getStream().ignoreAll(); + try + { + if (!request.getStream().eof()) + request.getStream().ignoreAll(); + } + catch (...) + { + tryLogCurrentException(log, "Cannot read remaining request body during exception handling"); + response.setKeepAlive(false); + } } if (exception_code == ErrorCodes::REQUIRED_PASSWORD) @@ -1163,7 +1165,7 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse /// Check if exception was thrown in used_output.finalize(). /// In this case used_output can be in invalid state and we /// cannot write in it anymore. So, just log this exception. - if (used_output.isFinalized()) + if (used_output.isFinalized() || used_output.isCanceled()) { if (thread_trace_context) thread_trace_context->root_span.addAttribute("clickhouse.exception", "Cannot flush data to client"); @@ -1182,6 +1184,8 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse if (thread_trace_context) thread_trace_context->root_span.addAttribute(status); + + return; } used_output.finalize(); diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index c5551102f7a..c78c45826f0 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -78,6 +78,7 @@ private: WriteBuffer * out_maybe_delayed_and_compressed = nullptr; bool finalized = false; + bool canceled = false; bool exception_is_written = false; std::function exception_writer; @@ -99,6 +100,24 @@ private: out->finalize(); } + void cancel() + { + if (canceled) + return; + canceled = true; + + if (out_compressed_holder) + out_compressed_holder->cancel(); + if (out) + out->cancel(); + } + + + bool isCanceled() const + { + return canceled; + } + bool isFinalized() const { return finalized; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index b60339e9fd8..b59fe2c1849 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1,9 +1,8 @@ -#include "Interpreters/AsynchronousInsertQueue.h" -#include "Interpreters/SquashingTransform.h" -#include "Parsers/ASTInsertQuery.h" +#include +#include +#include #include #include -#include #include #include #include @@ -388,7 +387,7 @@ void TCPHandler::runImpl() query_scope.emplace(query_context, /* fatal_error_callback */ [this] { - std::lock_guard lock(fatal_error_mutex); + std::lock_guard lock(out_mutex); sendLogs(); }); @@ -476,7 +475,7 @@ void TCPHandler::runImpl() Stopwatch watch; CurrentMetrics::Increment callback_metric_increment(CurrentMetrics::ReadTaskRequestsSent); - std::lock_guard lock(task_callback_mutex); + std::scoped_lock lock(out_mutex, task_callback_mutex); if (state.cancellation_status == CancellationStatus::FULLY_CANCELLED) return {}; @@ -492,7 +491,7 @@ void TCPHandler::runImpl() { Stopwatch watch; CurrentMetrics::Increment callback_metric_increment(CurrentMetrics::MergeTreeAllRangesAnnouncementsSent); - std::lock_guard lock(task_callback_mutex); + std::scoped_lock lock(out_mutex, task_callback_mutex); if (state.cancellation_status == CancellationStatus::FULLY_CANCELLED) return; @@ -506,7 +505,7 @@ void TCPHandler::runImpl() { Stopwatch watch; CurrentMetrics::Increment callback_metric_increment(CurrentMetrics::MergeTreeReadTaskRequestsSent); - std::lock_guard lock(task_callback_mutex); + std::scoped_lock lock(out_mutex, task_callback_mutex); if (state.cancellation_status == CancellationStatus::FULLY_CANCELLED) return std::nullopt; @@ -554,7 +553,7 @@ void TCPHandler::runImpl() { auto callback = [this]() { - std::scoped_lock lock(task_callback_mutex, fatal_error_mutex); + std::scoped_lock lock(out_mutex, task_callback_mutex); if (getQueryCancellationStatus() == CancellationStatus::FULLY_CANCELLED) return true; @@ -573,7 +572,7 @@ void TCPHandler::runImpl() finish_or_cancel(); - std::lock_guard lock(task_callback_mutex); + std::lock_guard lock(out_mutex); /// Send final progress after calling onFinish(), since it will update the progress. /// @@ -596,7 +595,7 @@ void TCPHandler::runImpl() break; { - std::lock_guard lock(task_callback_mutex); + std::lock_guard lock(out_mutex); sendLogs(); sendEndOfStream(); } @@ -885,13 +884,16 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - SquashingTransform squashing(0, query_context->getSettingsRef().async_insert_max_data_size); + Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); while (readDataNext()) { - auto result = squashing.add(std::move(state.block_for_insert)); - if (result) + squashing.header = state.block_for_insert; + auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); + if (planned_chunk.hasChunkInfo()) { + Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -900,7 +902,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - auto result = squashing.add({}); + auto planned_chunk = squashing.flush(); + Chunk result_chunk; + if (planned_chunk.hasChunkInfo()) + result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } @@ -1007,7 +1014,7 @@ void TCPHandler::processOrdinaryQuery() if (query_context->getSettingsRef().allow_experimental_query_deduplication) { - std::lock_guard lock(task_callback_mutex); + std::lock_guard lock(out_mutex); sendPartUUIDs(); } @@ -1017,13 +1024,13 @@ void TCPHandler::processOrdinaryQuery() if (header) { - std::lock_guard lock(task_callback_mutex); + std::lock_guard lock(out_mutex); sendData(header); } } /// Defer locking to cover a part of the scope below and everything after it - std::unique_lock progress_lock(task_callback_mutex, std::defer_lock); + std::unique_lock out_lock(out_mutex, std::defer_lock); { PullingAsyncPipelineExecutor executor(pipeline); @@ -1049,6 +1056,9 @@ void TCPHandler::processOrdinaryQuery() executor.cancelReading(); } + lock.unlock(); + out_lock.lock(); + if (after_send_progress.elapsed() / 1000 >= interactive_delay) { /// Some time passed and there is a progress. @@ -1064,12 +1074,14 @@ void TCPHandler::processOrdinaryQuery() if (!state.io.null_format) sendData(block); } + + out_lock.unlock(); } /// This lock wasn't acquired before and we make .lock() call here /// so everything under this line is covered even together /// with sendProgress() out of the scope - progress_lock.lock(); + out_lock.lock(); /** If data has run out, we will send the profiling data and total values to * the last zero block to be able to use diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 75e36836b63..74afb5a14a5 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -19,6 +19,7 @@ #include #include +#include "Core/Types.h" #include "IServer.h" #include "Interpreters/AsynchronousInsertQueue.h" #include "Server/TCPProtocolStackData.h" @@ -225,8 +226,13 @@ private: std::optional nonce; String cluster; + /// `out_mutex` protects `out` (WriteBuffer). + /// So it is used for method sendData(), sendProgress(), sendLogs(), etc. + std::mutex out_mutex; + /// `task_callback_mutex` protects tasks callbacks. + /// Inside these callbacks we might also change cancellation status, + /// so it also protects cancellation status checks. std::mutex task_callback_mutex; - std::mutex fatal_error_mutex; /// At the moment, only one ongoing query in the connection is supported at a time. QueryState state; diff --git a/src/Server/TLSHandler.cpp b/src/Server/TLSHandler.cpp new file mode 100644 index 00000000000..b0ed342c251 --- /dev/null +++ b/src/Server/TLSHandler.cpp @@ -0,0 +1,118 @@ +#include + +#include +#include + + +#if USE_SSL +# include +# include +# include +#endif + +#if !defined(USE_SSL) || USE_SSL == 0 +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; +} +#endif + +DB::TLSHandler::TLSHandler( + const StreamSocket & socket, + [[maybe_unused]] const LayeredConfiguration & config_, + [[maybe_unused]] const std::string & prefix_, + TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket) +#if USE_SSL + , config(config_) + , prefix(prefix_) +#endif + , stack_data(stack_data_) +{ +#if USE_SSL + params.privateKeyFile = config.getString(prefix + SSLManager::CFG_PRIV_KEY_FILE, ""); + params.certificateFile = config.getString(prefix + SSLManager::CFG_CERTIFICATE_FILE, params.privateKeyFile); + if (!params.privateKeyFile.empty() && !params.certificateFile.empty()) + { + // for backwards compatibility + auto ctx = SSLManager::instance().defaultServerContext(); + params.caLocation = config.getString(prefix + SSLManager::CFG_CA_LOCATION, ctx->getCAPaths().caLocation); + + // optional options for which we have defaults defined + params.verificationMode = SSLManager::VAL_VER_MODE; + if (config.hasProperty(prefix + SSLManager::CFG_VER_MODE)) + { + // either: none, relaxed, strict, once + std::string mode = config.getString(prefix + SSLManager::CFG_VER_MODE); + params.verificationMode = Poco::Net::Utility::convertVerificationMode(mode); + } + + params.verificationDepth = config.getInt(prefix + SSLManager::CFG_VER_DEPTH, SSLManager::VAL_VER_DEPTH); + params.loadDefaultCAs = config.getBool(prefix + SSLManager::CFG_ENABLE_DEFAULT_CA, SSLManager::VAL_ENABLE_DEFAULT_CA); + params.cipherList = config.getString(prefix + SSLManager::CFG_CIPHER_LIST, SSLManager::VAL_CIPHER_LIST); + params.cipherList = config.getString(prefix + SSLManager::CFG_CYPHER_LIST, params.cipherList); // for backwards compatibility + + bool require_tlsv1 = config.getBool(prefix + SSLManager::CFG_REQUIRE_TLSV1, false); + bool require_tlsv1_1 = config.getBool(prefix + SSLManager::CFG_REQUIRE_TLSV1_1, false); + bool require_tlsv1_2 = config.getBool(prefix + SSLManager::CFG_REQUIRE_TLSV1_2, false); + if (require_tlsv1_2) + usage = Context::TLSV1_2_SERVER_USE; + else if (require_tlsv1_1) + usage = Context::TLSV1_1_SERVER_USE; + else if (require_tlsv1) + usage = Context::TLSV1_SERVER_USE; + else + usage = Context::SERVER_USE; + + params.dhParamsFile = config.getString(prefix + SSLManager::CFG_DH_PARAMS_FILE, ""); + params.ecdhCurve = config.getString(prefix + SSLManager::CFG_ECDH_CURVE, ""); + + std::string disabled_protocols_list = config.getString(prefix + SSLManager::CFG_DISABLE_PROTOCOLS, ""); + Poco::StringTokenizer dp_tok(disabled_protocols_list, ";,", Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY); + disabled_protocols = 0; + for (const auto & token : dp_tok) + { + if (token == "sslv2") + disabled_protocols |= Context::PROTO_SSLV2; + else if (token == "sslv3") + disabled_protocols |= Context::PROTO_SSLV3; + else if (token == "tlsv1") + disabled_protocols |= Context::PROTO_TLSV1; + else if (token == "tlsv1_1") + disabled_protocols |= Context::PROTO_TLSV1_1; + else if (token == "tlsv1_2") + disabled_protocols |= Context::PROTO_TLSV1_2; + } + + extended_verification = config.getBool(prefix + SSLManager::CFG_EXTENDED_VERIFICATION, false); + prefer_server_ciphers = config.getBool(prefix + SSLManager::CFG_PREFER_SERVER_CIPHERS, false); + } +#endif +} + + +void DB::TLSHandler::run() +{ +#if USE_SSL + auto ctx = SSLManager::instance().defaultServerContext(); + if (!params.privateKeyFile.empty() && !params.certificateFile.empty()) + { + ctx = SSLManager::instance().getCustomServerContext(prefix); + if (!ctx) + { + ctx = new Context(usage, params); + ctx->disableProtocols(disabled_protocols); + ctx->enableExtendedCertificateVerification(extended_verification); + if (prefer_server_ciphers) + ctx->preferServerCiphers(); + CertificateReloader::instance().tryLoad(config, ctx->sslContext(), prefix); + ctx = SSLManager::instance().setCustomServerContext(prefix, ctx); + } + } + socket() = SecureStreamSocket::attach(socket(), ctx); + stack_data.socket = socket(); + stack_data.certificate = params.certificateFile; +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); +#endif +} diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index dd025e3e165..2bec7380b08 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -1,9 +1,10 @@ #pragma once #include -#include -#include #include +#include + +#include "config.h" #if USE_SSL # include @@ -14,11 +15,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int SUPPORT_IS_DISABLED; -} - class TLSHandler : public Poco::Net::TCPServerConnection { #if USE_SSL @@ -27,30 +23,22 @@ class TLSHandler : public Poco::Net::TCPServerConnection using Context = Poco::Net::Context; #endif using StreamSocket = Poco::Net::StreamSocket; + using LayeredConfiguration = Poco::Util::LayeredConfiguration; public: - explicit TLSHandler(const StreamSocket & socket, const std::string & key_, const std::string & certificate_, TCPProtocolStackData & stack_data_) - : Poco::Net::TCPServerConnection(socket) - , key(key_) - , certificate(certificate_) - , stack_data(stack_data_) - {} + explicit TLSHandler(const StreamSocket & socket, const LayeredConfiguration & config_, const std::string & prefix_, TCPProtocolStackData & stack_data_); + + void run() override; - void run() override - { -#if USE_SSL - auto ctx = SSLManager::instance().defaultServerContext(); - if (!key.empty() && !certificate.empty()) - ctx = new Context(Context::Usage::SERVER_USE, key, certificate, ctx->getCAPaths().caLocation); - socket() = SecureStreamSocket::attach(socket(), ctx); - stack_data.socket = socket(); - stack_data.certificate = certificate; -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -#endif - } private: - std::string key [[maybe_unused]]; - std::string certificate [[maybe_unused]]; +#if USE_SSL + Context::Params params [[maybe_unused]]; + Context::Usage usage [[maybe_unused]]; + int disabled_protocols = 0; + bool extended_verification = false; + bool prefer_server_ciphers = false; + const LayeredConfiguration & config [[maybe_unused]]; + std::string prefix [[maybe_unused]]; +#endif TCPProtocolStackData & stack_data [[maybe_unused]]; }; diff --git a/src/Server/TLSHandlerFactory.h b/src/Server/TLSHandlerFactory.h index 19602c7d25e..e8f3a1b7853 100644 --- a/src/Server/TLSHandlerFactory.h +++ b/src/Server/TLSHandlerFactory.h @@ -48,8 +48,8 @@ public: LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); return new TLSHandler( socket, - server.config().getString(conf_name + ".privateKeyFile", ""), - server.config().getString(conf_name + ".certificateFile", ""), + server.config(), + conf_name + ".", stack_data); } catch (const Poco::Net::NetException &) diff --git a/src/Server/grpc_protos/clickhouse_grpc.proto b/src/Server/grpc_protos/clickhouse_grpc.proto index c9ba6f28506..dc17570f833 100644 --- a/src/Server/grpc_protos/clickhouse_grpc.proto +++ b/src/Server/grpc_protos/clickhouse_grpc.proto @@ -90,6 +90,7 @@ message QueryInfo { string user_name = 9; string password = 10; string quota = 11; + string jwt = 25; // Works exactly like sessions in the HTTP protocol. string session_id = 12; diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 0d491067afc..35a5e95e643 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1057,7 +1057,7 @@ bool AlterCommand::isRemovingProperty() const bool AlterCommand::isDropSomething() const { - return type == Type::DROP_COLUMN || type == Type::DROP_INDEX + return type == Type::DROP_COLUMN || type == Type::DROP_INDEX || type == Type::DROP_STATISTICS || type == Type::DROP_CONSTRAINT || type == Type::DROP_PROJECTION; } diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index c3aacfd67d3..57a1ea302f9 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,7 +21,7 @@ limitations under the License. */ #include #include #include -#include +#include #include #include #include @@ -626,7 +626,7 @@ QueryPipelineBuilder StorageLiveView::completeQuery(Pipes pipes) /// and two-level aggregation is triggered). builder.addSimpleTransform([&](const Block & cur_header) { - return std::make_shared( + return std::make_shared( cur_header, getContext()->getSettingsRef().min_insert_block_size_rows, getContext()->getSettingsRef().min_insert_block_size_bytes); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 4c8f1240cf5..e92a608eed0 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -737,10 +737,10 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks { /// Don't scare people with broken part error if (!isRetryableException(std::current_exception())) - LOG_ERROR(storage.log, "Part {} is broken and need manual correction", getDataPartStorage().getFullPath()); + LOG_ERROR(storage.log, "Part {} is broken and needs manual correction", getDataPartStorage().getFullPath()); // There could be conditions that data part to be loaded is broken, but some of meta infos are already written - // into meta data before exception, need to clean them all. + // into metadata before exception, need to clean them all. metadata_manager->deleteAll(/*include_projection*/ true); metadata_manager->assertAllDeleted(/*include_projection*/ true); throw; @@ -1577,7 +1577,7 @@ void IMergeTreeDataPart::loadColumns(bool require) if (getFileNameForColumn(column)) loaded_columns.push_back(column); - if (columns.empty()) + if (loaded_columns.empty()) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns in part {}", name); if (!is_readonly_storage) diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index f8cf19120c7..7e4b1db4c89 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -689,6 +690,11 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( return *res; } +const std::unordered_map KeyCondition::space_filling_curve_name_to_type { + {"mortonEncode", SpaceFillingCurveType::Morton}, + {"hilbertEncode", SpaceFillingCurveType::Hilbert} +}; + ActionsDAGPtr KeyCondition::cloneASTWithInversionPushDown(ActionsDAG::NodeRawConstPtrs nodes, const ContextPtr & context) { auto res = std::make_shared(); @@ -744,16 +750,17 @@ static NameSet getAllSubexpressionNames(const ExpressionActions & key_expr) void KeyCondition::getAllSpaceFillingCurves() { - /// So far the only supported function is mortonEncode (Morton curve). + /// So far the only supported function is mortonEncode and hilbertEncode (Morton and Hilbert curves). for (const auto & action : key_expr->getActions()) { if (action.node->type == ActionsDAG::ActionType::FUNCTION && action.node->children.size() >= 2 - && action.node->function_base->getName() == "mortonEncode") + && space_filling_curve_name_to_type.contains(action.node->function_base->getName())) { SpaceFillingCurveDescription curve; curve.function_name = action.node->function_base->getName(); + curve.type = space_filling_curve_name_to_type.at(curve.function_name); curve.key_column_pos = key_columns.at(action.node->result_name); for (const auto & child : action.node->children) { @@ -2665,6 +2672,15 @@ BoolMask KeyCondition::checkInHyperrectangle( const DataTypes & data_types) const { std::vector rpn_stack; + + auto curve_type = [&](size_t key_column_pos) + { + for (const auto & curve : key_space_filling_curves) + if (curve.key_column_pos == key_column_pos) + return curve.type; + return SpaceFillingCurveType::Unknown; + }; + for (const auto & element : rpn) { if (element.argument_num_of_space_filling_curve.has_value()) @@ -2764,26 +2780,43 @@ BoolMask KeyCondition::checkInHyperrectangle( UInt64 right = key_range.right.get(); BoolMask mask(false, true); - mortonIntervalToHyperrectangles<2>(left, right, - [&](std::array, 2> morton_hyperrectangle) + auto hyperrectangle_intersection_callback = [&](std::array, 2> curve_hyperrectangle) + { + BoolMask current_intersection(true, false); + for (size_t dim = 0; dim < num_dimensions; ++dim) { - BoolMask current_intersection(true, false); - for (size_t dim = 0; dim < num_dimensions; ++dim) - { - const Range & condition_arg_range = element.space_filling_curve_args_hyperrectangle[dim]; + const Range & condition_arg_range = element.space_filling_curve_args_hyperrectangle[dim]; - const Range morton_arg_range( - morton_hyperrectangle[dim].first, true, - morton_hyperrectangle[dim].second, true); + const Range curve_arg_range( + curve_hyperrectangle[dim].first, true, + curve_hyperrectangle[dim].second, true); - bool intersects = condition_arg_range.intersectsRange(morton_arg_range); - bool contains = condition_arg_range.containsRange(morton_arg_range); + bool intersects = condition_arg_range.intersectsRange(curve_arg_range); + bool contains = condition_arg_range.containsRange(curve_arg_range); - current_intersection = current_intersection & BoolMask(intersects, !contains); - } + current_intersection = current_intersection & BoolMask(intersects, !contains); + } - mask = mask | current_intersection; - }); + mask = mask | current_intersection; + }; + + switch (curve_type(element.key_column)) + { + case SpaceFillingCurveType::Hilbert: + { + hilbertIntervalToHyperrectangles2D(left, right, hyperrectangle_intersection_callback); + break; + } + case SpaceFillingCurveType::Morton: + { + mortonIntervalToHyperrectangles<2>(left, right, hyperrectangle_intersection_callback); + break; + } + case SpaceFillingCurveType::Unknown: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "curve_type is `Unknown`. It is a bug."); + } + } rpn_stack.emplace_back(mask); } diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 2bc3b108e02..6e5956706aa 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -328,11 +328,20 @@ private: const NameSet key_subexpr_names; /// Space-filling curves in the key + enum class SpaceFillingCurveType + { + Unknown = 0, + Morton, + Hilbert + }; + static const std::unordered_map space_filling_curve_name_to_type; + struct SpaceFillingCurveDescription { size_t key_column_pos; String function_name; std::vector arguments; + SpaceFillingCurveType type; }; using SpaceFillingCurveDescriptions = std::vector; SpaceFillingCurveDescriptions key_space_filling_curves; diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 2db0c0af3d7..79efb0ca8b3 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -310,7 +310,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() auto table_id = storage.getStorageID(); task_context = Context::createCopy(storage.getContext()); - task_context->makeQueryContext(); + task_context->makeQueryContextForMerge(*storage.getSettings()); task_context->setCurrentQueryId(getQueryId()); task_context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MERGE); diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index a7070c80df9..be44177847c 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -165,7 +165,7 @@ void MergePlainMergeTreeTask::finish() ContextMutablePtr MergePlainMergeTreeTask::createTaskContext() const { auto context = Context::createCopy(storage.getContext()); - context->makeQueryContext(); + context->makeQueryContextForMerge(*storage.getSettings()); auto queryId = getQueryId(); context->setCurrentQueryId(queryId); context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MERGE); diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 56bd1181fef..7ab8fa2430a 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -531,9 +531,9 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_columns.size(); global_ctx->merge_list_element_ptr->progress.store(ctx->column_sizes->keyColumnsWeight(), std::memory_order_relaxed); - ctx->rows_sources_write_buf->next(); - ctx->rows_sources_uncompressed_write_buf->next(); /// Ensure data has written to disk. + ctx->rows_sources_write_buf->finalize(); + ctx->rows_sources_uncompressed_write_buf->finalize(); ctx->rows_sources_uncompressed_write_buf->finalize(); size_t rows_sources_count = ctx->rows_sources_write_buf->count(); diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 0e13d3aef62..56909d1b7a0 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -138,7 +138,7 @@ private: virtual ~IStage() = default; }; - /// By default this context is uninitialed, but some variables has to be set after construction, + /// By default this context is uninitialized, but some variables has to be set after construction, /// some variables are used in a process of execution /// Proper initialization is responsibility of the author struct GlobalRuntimeContext : public IStageRuntimeContext @@ -199,7 +199,7 @@ private: using GlobalRuntimeContextPtr = std::shared_ptr; - /// By default this context is uninitialed, but some variables has to be set after construction, + /// By default this context is uninitialized, but some variables has to be set after construction, /// some variables are used in a process of execution /// Proper initialization is responsibility of the author struct ExecuteAndFinalizeHorizontalPartRuntimeContext : public IStageRuntimeContext @@ -273,7 +273,7 @@ private: GlobalRuntimeContextPtr global_ctx; }; - /// By default this context is uninitialed, but some variables has to be set after construction, + /// By default this context is uninitialized, but some variables has to be set after construction, /// some variables are used in a process of execution /// Proper initialization is responsibility of the author struct VerticalMergeRuntimeContext : public IStageRuntimeContext @@ -348,7 +348,7 @@ private: GlobalRuntimeContextPtr global_ctx; }; - /// By default this context is uninitialed, but some variables has to be set after construction, + /// By default this context is uninitialized, but some variables has to be set after construction, /// some variables are used in a process of execution /// Proper initialization is responsibility of the author struct MergeProjectionsRuntimeContext : public IStageRuntimeContext diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 89f39c65517..fae2663f079 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1759,11 +1759,14 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional runner(getActivePartsLoadingThreadPool().get(), "ActiveParts"); + bool all_disks_are_readonly = true; for (size_t i = 0; i < disks.size(); ++i) { const auto & disk_ptr = disks[i]; if (disk_ptr->isBroken()) continue; + if (!disk_ptr->isReadOnly()) + all_disks_are_readonly = false; auto & disk_parts = parts_to_load_by_disk[i]; auto & unexpected_disk_parts = unexpected_parts_to_load_by_disk[i]; @@ -1916,7 +1919,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalrenameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes @@ -1961,7 +1963,8 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalactions->findInOutputs(additional_filter_info->column_name)); if (before_where) - filter_nodes.nodes.push_back(&before_where->findInOutputs(where_column_name)); + filter_nodes.nodes.push_back(&before_where->dag.findInOutputs(where_column_name)); return filter_nodes; } @@ -7111,8 +7114,8 @@ UInt64 MergeTreeData::estimateNumberOfRowsToRead( query_context->getSettingsRef().max_threads); UInt64 total_rows = result_ptr->selected_rows; - if (query_info.limit > 0 && query_info.limit < total_rows) - total_rows = query_info.limit; + if (query_info.trivial_limit > 0 && query_info.trivial_limit < total_rows) + total_rows = query_info.trivial_limit; return total_rows; } @@ -8083,6 +8086,13 @@ void MergeTreeData::checkDropCommandDoesntAffectInProgressMutations(const AlterC throw_exception(mutation_name, "column", command.column_name); } } + else if (command.type == AlterCommand::DROP_STATISTICS) + { + for (const auto & stats_col1 : command.statistics_columns) + for (const auto & stats_col2 : mutation_command.statistics_columns) + if (stats_col1 == stats_col2) + throw_exception(mutation_name, "statistics", stats_col1); + } } } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index f33f4293023..b327480fa92 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -245,6 +245,8 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const writeBinaryLittleEndian(sum.uncompressed_hash, out); } } + + out.finalize(); } void MergeTreeDataPartChecksums::addFile(const String & file_name, UInt64 file_size, MergeTreeDataPartChecksum::uint128 file_hash) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 379c465a409..ee1a9b7f8ed 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -298,6 +298,11 @@ std::optional MergeTreeDataPartWide::getColumnModificationTime(const Str std::optional MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & column) const { std::optional filename; + + /// Fallback for the case when serializations was not loaded yet (called from loadColumns()) + if (getSerializations().empty()) + return getStreamNameForColumn(column, {}, DATA_FILE_EXTENSION, getDataPartStorage()); + getSerialization(column.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { if (!filename.has_value()) @@ -309,6 +314,7 @@ std::optional MergeTreeDataPartWide::getFileNameForColumn(const NameAndT filename = getStreamNameForColumn(column, substream_path, DATA_FILE_EXTENSION, getDataPartStorage()); } }); + return filename; } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 6ababefa530..1f8d6abebd2 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -81,6 +81,8 @@ struct Settings; M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "Use an estimated source part size (excluding lightweight deleted rows) when selecting parts to merge", 0) \ + M(String, merge_workload, "", "Name of workload to be used to access resources for merges", 0) \ + M(String, mutation_workload, "", "Name of workload to be used to access resources for mutations", 0) \ \ /** Inserts settings. */ \ M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index b7dede3cb00..05751e0fa6f 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -54,6 +54,10 @@ void MergeTreeSink::onFinish() finishDelayedChunk(); } +void MergeTreeSink::onCancel() +{ +} + void MergeTreeSink::consume(Chunk chunk) { if (num_blocks_processed > 0) diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 07ab3850df2..cf6715a3415 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -28,6 +28,7 @@ public: void consume(Chunk chunk) override; void onStart() override; void onFinish() override; + void onCancel() override; private: StorageMergeTree & storage; diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 8d40658bb2c..4c96cbf2c97 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -204,7 +204,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() } task_context = Context::createCopy(storage.getContext()); - task_context->makeQueryContext(); + task_context->makeQueryContextForMutate(*storage.getSettings()); task_context->setCurrentQueryId(getQueryId()); task_context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION); diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 2fd02708421..20f387137e7 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -136,7 +136,7 @@ bool MutatePlainMergeTreeTask::executeStep() ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const { auto context = Context::createCopy(storage.getContext()); - context->makeQueryContext(); + context->makeQueryContextForMutate(*storage.getSettings()); auto queryId = getQueryId(); context->setCurrentQueryId(queryId); context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 54df68126f8..a552ee89aee 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -29,6 +29,7 @@ #include #include #include +#include namespace ProfileEvents @@ -1267,7 +1268,7 @@ private: ProjectionNameToItsBlocks projection_parts; std::move_iterator projection_parts_iterator; - std::vector projection_squashes; + std::vector projection_squashes; const ProjectionsDescription & projections; ExecutableTaskPtr merge_projection_parts_task_ptr; @@ -1286,7 +1287,7 @@ void PartMergerWriter::prepare() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { // We split the materialization into multiple stages similar to the process of INSERT SELECT query. - projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + projection_squashes.emplace_back(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } existing_rows_count = 0; @@ -1311,16 +1312,18 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; - Block projection_block; - { - ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); - projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context)); - } + ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); + Block block_to_squash = projection.calculate(cur_block, ctx->context); + projection_squashes[i].header = block_to_squash; + Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); - if (projection_block) + if (planned_chunk.hasChunkInfo()) { + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = block_to_squash.cloneWithColumns(projection_chunk.getColumns()); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); + *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); tmp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(tmp_part.part)); @@ -1338,12 +1341,15 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { const auto & projection = *ctx->projections_to_build[i]; - auto & projection_squash = projection_squashes[i]; - auto projection_block = projection_squash.add({}); - if (projection_block) + auto & projection_squash_plan = projection_squashes[i]; + auto planned_chunk = projection_squash_plan.flush(); + if (planned_chunk.hasChunkInfo()) { + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); + *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); temp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(temp_part.part)); diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 79c0e6ad262..5a84c6fd684 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -125,6 +125,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; +extern const int ALL_CONNECTION_TRIES_FAILED; } class ParallelReplicasReadingCoordinator::ImplInterface @@ -1025,7 +1026,11 @@ void ParallelReplicasReadingCoordinator::markReplicaAsUnavailable(size_t replica std::lock_guard lock(mutex); if (!pimpl) + { unavailable_nodes_registered_before_initialization.push_back(replica_number); + if (unavailable_nodes_registered_before_initialization.size() == replicas_count) + throw Exception(ErrorCodes::ALL_CONNECTION_TRIES_FAILED, "Can't connect to any replica chosen for query execution"); + } else pimpl->markReplicaAsUnavailable(replica_number); } diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h index 60343988f03..8b463fda395 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h @@ -34,7 +34,7 @@ private: void initialize(CoordinationMode mode); std::mutex mutex; - size_t replicas_count{0}; + const size_t replicas_count{0}; size_t mark_segment_size{0}; std::unique_ptr pimpl; ProgressCallback progress_callback; // store the callback only to bypass it to coordinator implementation diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index e30d63c343a..30ba95c46f0 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -2004,7 +2004,9 @@ MutationCommands ReplicatedMergeTreeQueue::getMutationCommands( MutationCommands commands; for (auto it = begin; it != end; ++it) { - chassert(mutation_pointer < it->second->entry->znode_name); + /// FIXME : This was supposed to be fixed after releasing 23.5 (it fails in Upgrade check) + /// but it's still present https://github.com/ClickHouse/ClickHouse/issues/65275 + /// chassert(mutation_pointer < it->second->entry->znode_name); mutation_ids.push_back(it->second->entry->znode_name); const auto & commands_from_entry = it->second->entry->commands; commands.insert(commands.end(), commands_from_entry.begin(), commands_from_entry.end()); diff --git a/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp index 8277a769a11..e2e7f238a5e 100644 --- a/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp @@ -136,7 +136,8 @@ WriteBufferFromHDFS::~WriteBufferFromHDFS() { try { - finalize(); + if (!canceled) + finalize(); } catch (...) { diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index d13aec4a4f6..f2f6eac333c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -50,56 +50,58 @@ void StorageObjectStorageSink::consume(Chunk chunk) void StorageObjectStorageSink::onCancel() { std::lock_guard lock(cancel_mutex); - finalize(); + cancelBuffers(); + releaseBuffers(); cancelled = true; } -void StorageObjectStorageSink::onException(std::exception_ptr exception) +void StorageObjectStorageSink::onException(std::exception_ptr) { std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization. - release(); - } + cancelBuffers(); + releaseBuffers(); } void StorageObjectStorageSink::onFinish() { std::lock_guard lock(cancel_mutex); - finalize(); + finalizeBuffers(); } -void StorageObjectStorageSink::finalize() +void StorageObjectStorageSink::finalizeBuffers() { if (!writer) return; try { - writer->finalize(); writer->flush(); + writer->finalize(); } catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - release(); + releaseBuffers(); throw; } write_buf->finalize(); } -void StorageObjectStorageSink::release() +void StorageObjectStorageSink::releaseBuffers() { writer.reset(); write_buf.reset(); } +void StorageObjectStorageSink::cancelBuffers() +{ + if (writer) + writer->cancel(); + if (write_buf) + write_buf->cancel(); +} + PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 45cf83d606f..e0081193686 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -35,8 +35,9 @@ private: bool cancelled = false; std::mutex cancel_mutex; - void finalize(); - void release(); + void finalizeBuffers(); + void releaseBuffers(); + void cancelBuffers(); }; class PartitionedStorageObjectStorageSink : public PartitionedSink diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 2fc6993369d..aef783fc3c4 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -193,21 +193,21 @@ Chunk StorageObjectStorageSource::generate() progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); const auto & object_info = reader.getObjectInfo(); - const auto & filename = object_info.getFileName(); - chassert(object_info.metadata); + const auto & filename = object_info->getFileName(); + chassert(object_info->metadata); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( chunk, read_from_format_info.requested_virtual_columns, { - .path = getUniqueStoragePathIdentifier(*configuration, reader.getObjectInfo(), false), - .size = object_info.metadata->size_bytes, + .path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), + .size = object_info->metadata->size_bytes, .filename = &filename, - .last_modified = object_info.metadata->last_modified + .last_modified = object_info->metadata->last_modified }); return chunk; } if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getObjectInfo(), total_rows_in_file); + addNumRowsToCache(*reader.getObjectInfo(), total_rows_in_file); total_rows_in_file = 0; @@ -517,24 +517,22 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne else ++it; } + + if (filter_dag) + { + std::vector paths; + paths.reserve(new_batch.size()); + for (const auto & object_info : new_batch) + paths.push_back(getUniqueStoragePathIdentifier(*configuration, *object_info, false)); + + VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); + + LOG_TEST(logger, "Filtered files: {} -> {}", paths.size(), new_batch.size()); + } } index = 0; - if (filter_dag) - { - std::vector paths; - paths.reserve(new_batch.size()); - for (const auto & object_info : new_batch) - { - chassert(object_info); - paths.push_back(getUniqueStoragePathIdentifier(*configuration, *object_info, false)); - } - - VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); - LOG_TEST(logger, "Filtered files: {} -> {}", paths.size(), new_batch.size()); - } - if (read_keys) read_keys->insert(read_keys->end(), new_batch.begin(), new_batch.end()); @@ -551,7 +549,12 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne } if (index >= object_infos.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata"); + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Index out of bound for blob metadata. Index: {}, size: {}", + index, object_infos.size()); + } return object_infos[index++]; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index fd7c7aa7102..d93097d2636 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -15,7 +15,7 @@ class SchemaCache; class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext { - friend class StorageS3QueueSource; + friend class ObjectStorageQueueSource; public: using Configuration = StorageObjectStorage::Configuration; using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; @@ -100,7 +100,7 @@ protected: PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } - const ObjectInfo & getObjectInfo() const { return *object_info; } + ObjectInfoPtr getObjectInfo() const { return object_info; } const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } private: diff --git a/src/Storages/S3Queue/S3QueueIFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp similarity index 73% rename from src/Storages/S3Queue/S3QueueIFileMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp index 6c4089115d4..52ee0c9f8ed 100644 --- a/src/Storages/S3Queue/S3QueueIFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -11,8 +11,8 @@ namespace ProfileEvents { - extern const Event S3QueueProcessedFiles; - extern const Event S3QueueFailedFiles; + extern const Event ObjectStorageQueueProcessedFiles; + extern const Event ObjectStorageQueueFailedFiles; }; namespace DB @@ -35,33 +35,40 @@ namespace } } -void S3QueueIFileMetadata::FileStatus::onProcessing() +void ObjectStorageQueueIFileMetadata::FileStatus::setProcessingEndTime() +{ + processing_end_time = now(); +} + +void ObjectStorageQueueIFileMetadata::FileStatus::onProcessing() { state = FileStatus::State::Processing; processing_start_time = now(); } -void S3QueueIFileMetadata::FileStatus::onProcessed() +void ObjectStorageQueueIFileMetadata::FileStatus::onProcessed() { state = FileStatus::State::Processed; - processing_end_time = now(); + if (!processing_end_time) + setProcessingEndTime(); } -void S3QueueIFileMetadata::FileStatus::onFailed(const std::string & exception) +void ObjectStorageQueueIFileMetadata::FileStatus::onFailed(const std::string & exception) { state = FileStatus::State::Failed; - processing_end_time = now(); + if (!processing_end_time) + setProcessingEndTime(); std::lock_guard lock(last_exception_mutex); last_exception = exception; } -std::string S3QueueIFileMetadata::FileStatus::getException() const +std::string ObjectStorageQueueIFileMetadata::FileStatus::getException() const { std::lock_guard lock(last_exception_mutex); return last_exception; } -std::string S3QueueIFileMetadata::NodeMetadata::toString() const +std::string ObjectStorageQueueIFileMetadata::NodeMetadata::toString() const { Poco::JSON::Object json; json.set("file_path", file_path); @@ -76,7 +83,7 @@ std::string S3QueueIFileMetadata::NodeMetadata::toString() const return oss.str(); } -S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::NodeMetadata::fromString(const std::string & metadata_str) +ObjectStorageQueueIFileMetadata::NodeMetadata ObjectStorageQueueIFileMetadata::NodeMetadata::fromString(const std::string & metadata_str) { Poco::JSON::Parser parser; auto json = parser.parse(metadata_str).extract(); @@ -91,7 +98,7 @@ S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::NodeMetadata::fromStrin return metadata; } -S3QueueIFileMetadata::S3QueueIFileMetadata( +ObjectStorageQueueIFileMetadata::ObjectStorageQueueIFileMetadata( const std::string & path_, const std::string & processing_node_path_, const std::string & processed_node_path_, @@ -116,11 +123,18 @@ S3QueueIFileMetadata::S3QueueIFileMetadata( processed_node_path, processing_node_path, failed_node_path); } -S3QueueIFileMetadata::~S3QueueIFileMetadata() +ObjectStorageQueueIFileMetadata::~ObjectStorageQueueIFileMetadata() { if (processing_id_version.has_value()) { - file_status->onFailed("Uncaught exception"); + if (file_status->getException().empty()) + { + if (std::current_exception()) + file_status->onFailed(getCurrentExceptionMessage(true)); + else + file_status->onFailed("Unprocessed exception"); + } + LOG_TEST(log, "Removing processing node in destructor for file: {}", path); try { @@ -148,9 +162,9 @@ S3QueueIFileMetadata::~S3QueueIFileMetadata() } } -std::string S3QueueIFileMetadata::getNodeName(const std::string & path) +std::string ObjectStorageQueueIFileMetadata::getNodeName(const std::string & path) { - /// Since with are dealing with paths in s3 which can have "/", + /// Since with are dealing with paths in object storage which can have "/", /// we cannot create a zookeeper node with the name equal to path. /// Therefore we use a hash of the path as a node name. @@ -159,7 +173,7 @@ std::string S3QueueIFileMetadata::getNodeName(const std::string & path) return toString(path_hash.get64()); } -S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::createNodeMetadata( +ObjectStorageQueueIFileMetadata::NodeMetadata ObjectStorageQueueIFileMetadata::createNodeMetadata( const std::string & path, const std::string & exception, size_t retries) @@ -168,9 +182,9 @@ S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::createNodeMetadata( /// Since node name is just a hash we want to know to which file it corresponds, /// so we keep "file_path" in nodes data. - /// "last_processed_timestamp" is needed for TTL metadata nodes enabled by s3queue_tracked_file_ttl_sec. - /// "last_exception" is kept for introspection, should also be visible in system.s3queue_log if it is enabled. - /// "retries" is kept for retrying the processing enabled by s3queue_loading_retries. + /// "last_processed_timestamp" is needed for TTL metadata nodes enabled by tracked_file_ttl_sec. + /// "last_exception" is kept for introspection, should also be visible in system.s3(azure)queue_log if it is enabled. + /// "retries" is kept for retrying the processing enabled by loading_retries. NodeMetadata metadata; metadata.file_path = path; metadata.last_processed_timestamp = now(); @@ -179,7 +193,7 @@ S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::createNodeMetadata( return metadata; } -std::string S3QueueIFileMetadata::getProcessorInfo(const std::string & processor_id) +std::string ObjectStorageQueueIFileMetadata::getProcessorInfo(const std::string & processor_id) { /// Add information which will be useful for debugging just in case. Poco::JSON::Object json; @@ -192,7 +206,7 @@ std::string S3QueueIFileMetadata::getProcessorInfo(const std::string & processor return oss.str(); } -bool S3QueueIFileMetadata::setProcessing() +bool ObjectStorageQueueIFileMetadata::setProcessing() { auto state = file_status->state.load(); if (state == FileStatus::State::Processing @@ -221,13 +235,22 @@ bool S3QueueIFileMetadata::setProcessing() return success; } -void S3QueueIFileMetadata::setProcessed() +void ObjectStorageQueueIFileMetadata::setProcessed() { LOG_TRACE(log, "Setting file {} as processed (path: {})", path, processed_node_path); - ProfileEvents::increment(ProfileEvents::S3QueueProcessedFiles); + ProfileEvents::increment(ProfileEvents::ObjectStorageQueueProcessedFiles); file_status->onProcessed(); - setProcessedImpl(); + + try + { + setProcessedImpl(); + } + catch (...) + { + file_status->onFailed(getCurrentExceptionMessage(true)); + throw; + } processing_id.reset(); processing_id_version.reset(); @@ -235,18 +258,36 @@ void S3QueueIFileMetadata::setProcessed() LOG_TRACE(log, "Set file {} as processed (rows: {})", path, file_status->processed_rows); } -void S3QueueIFileMetadata::setFailed(const std::string & exception) +void ObjectStorageQueueIFileMetadata::setFailed(const std::string & exception_message, bool reduce_retry_count, bool overwrite_status) { - LOG_TRACE(log, "Setting file {} as failed (exception: {}, path: {})", path, exception, failed_node_path); + LOG_TRACE(log, "Setting file {} as failed (path: {}, reduce retry count: {}, exception: {})", + path, failed_node_path, reduce_retry_count, exception_message); - ProfileEvents::increment(ProfileEvents::S3QueueFailedFiles); - file_status->onFailed(exception); - node_metadata.last_exception = exception; + ProfileEvents::increment(ProfileEvents::ObjectStorageQueueFailedFiles); + if (overwrite_status || file_status->state != FileStatus::State::Failed) + file_status->onFailed(exception_message); - if (max_loading_retries == 0) - setFailedNonRetriable(); - else - setFailedRetriable(); + node_metadata.last_exception = exception_message; + + if (reduce_retry_count) + { + try + { + if (max_loading_retries == 0) + setFailedNonRetriable(); + else + setFailedRetriable(); + } + catch (...) + { + auto full_exception = fmt::format( + "First exception: {}, exception while setting file as failed: {}", + exception_message, getCurrentExceptionMessage(true)); + + file_status->onFailed(full_exception); + throw; + } + } processing_id.reset(); processing_id_version.reset(); @@ -254,7 +295,7 @@ void S3QueueIFileMetadata::setFailed(const std::string & exception) LOG_TRACE(log, "Set file {} as failed (rows: {})", path, file_status->processed_rows); } -void S3QueueIFileMetadata::setFailedNonRetriable() +void ObjectStorageQueueIFileMetadata::setFailedNonRetriable() { auto zk_client = getZooKeeper(); Coordination::Requests requests; @@ -285,7 +326,7 @@ void S3QueueIFileMetadata::setFailedNonRetriable() throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error while setting file as failed: {}", code); } -void S3QueueIFileMetadata::setFailedRetriable() +void ObjectStorageQueueIFileMetadata::setFailedRetriable() { /// Instead of creating a persistent /failed/node_hash node /// we create a persistent /failed/node_hash.retriable node. @@ -296,19 +337,20 @@ void S3QueueIFileMetadata::setFailedRetriable() auto zk_client = getZooKeeper(); /// Extract the number of already done retries from node_hash.retriable node if it exists. + Coordination::Requests requests; Coordination::Stat stat; std::string res; - if (zk_client->tryGet(retrieable_failed_node_path, res, &stat)) + bool has_failed_before = zk_client->tryGet(retrieable_failed_node_path, res, &stat); + if (has_failed_before) { auto failed_node_metadata = NodeMetadata::fromString(res); node_metadata.retries = failed_node_metadata.retries + 1; file_status->retries = node_metadata.retries; } - LOG_TRACE(log, "File `{}` failed to process, try {}/{}", - path, node_metadata.retries, max_loading_retries); + LOG_TRACE(log, "File `{}` failed to process, try {}/{}, retries node exists: {} (failed node path: {})", + path, node_metadata.retries, max_loading_retries, has_failed_before, failed_node_path); - Coordination::Requests requests; if (node_metadata.retries >= max_loading_retries) { /// File is no longer retriable. diff --git a/src/Storages/S3Queue/S3QueueIFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h similarity index 89% rename from src/Storages/S3Queue/S3QueueIFileMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h index e0b0d16cbcc..652b4742389 100644 --- a/src/Storages/S3Queue/S3QueueIFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h @@ -6,7 +6,7 @@ namespace DB { -class S3QueueIFileMetadata +class ObjectStorageQueueIFileMetadata { public: struct FileStatus @@ -19,6 +19,7 @@ public: None }; + void setProcessingEndTime(); void onProcessing(); void onProcessed(); void onFailed(const std::string & exception); @@ -41,7 +42,7 @@ public: }; using FileStatusPtr = std::shared_ptr; - explicit S3QueueIFileMetadata( + explicit ObjectStorageQueueIFileMetadata( const std::string & path_, const std::string & processing_node_path_, const std::string & processed_node_path_, @@ -50,17 +51,19 @@ public: size_t max_loading_retries_, LoggerPtr log_); - virtual ~S3QueueIFileMetadata(); + virtual ~ObjectStorageQueueIFileMetadata(); bool setProcessing(); void setProcessed(); - void setFailed(const std::string & exception); + void setFailed(const std::string & exception_message, bool reduce_retry_count, bool overwrite_status); virtual void setProcessedAtStartRequests( Coordination::Requests & requests, const zkutil::ZooKeeperPtr & zk_client) = 0; FileStatusPtr getFileStatus() { return file_status; } + const std::string & getPath() const { return path; } + size_t getMaxTries() const { return max_loading_retries; } struct NodeMetadata { @@ -92,7 +95,7 @@ protected: LoggerPtr log; /// processing node is ephemeral, so we cannot verify with it if - /// this node was created by a certain processor on a previous s3 queue processing stage, + /// this node was created by a certain processor on a previous processing stage, /// because we could get a session expired in between the stages /// and someone else could just create this processing node. /// Therefore we also create a persistent processing node diff --git a/src/Storages/S3Queue/S3QueueMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp similarity index 71% rename from src/Storages/S3Queue/S3QueueMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp index e828e9f0716..23ac92b667a 100644 --- a/src/Storages/S3Queue/S3QueueMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp @@ -4,13 +4,12 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -22,13 +21,8 @@ namespace ProfileEvents { - extern const Event S3QueueSetFileProcessingMicroseconds; - extern const Event S3QueueSetFileProcessedMicroseconds; - extern const Event S3QueueSetFileFailedMicroseconds; - extern const Event S3QueueFailedFiles; - extern const Event S3QueueProcessedFiles; - extern const Event S3QueueCleanupMaxSetSizeOrTTLMicroseconds; - extern const Event S3QueueLockLocalFileStatusesMicroseconds; + extern const Event ObjectStorageQueueCleanupMaxSetSizeOrTTLMicroseconds; + extern const Event ObjectStorageQueueLockLocalFileStatusesMicroseconds; }; namespace DB @@ -63,7 +57,7 @@ namespace } } -class S3QueueMetadata::LocalFileStatuses +class ObjectStorageQueueMetadata::LocalFileStatuses { public: LocalFileStatuses() = default; @@ -109,95 +103,89 @@ private: std::unique_lock lock() const { - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueLockLocalFileStatusesMicroseconds); + auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueueLockLocalFileStatusesMicroseconds); return std::unique_lock(mutex); } }; -S3QueueMetadata::S3QueueMetadata(const fs::path & zookeeper_path_, const S3QueueSettings & settings_) +ObjectStorageQueueMetadata::ObjectStorageQueueMetadata(const fs::path & zookeeper_path_, const ObjectStorageQueueSettings & settings_) : settings(settings_) , zookeeper_path(zookeeper_path_) , buckets_num(getBucketsNum(settings_)) - , log(getLogger("StorageS3Queue(" + zookeeper_path_.string() + ")")) + , log(getLogger("StorageObjectStorageQueue(" + zookeeper_path_.string() + ")")) , local_file_statuses(std::make_shared()) { - if (settings.mode == S3QueueMode::UNORDERED - && (settings.s3queue_tracked_files_limit || settings.s3queue_tracked_file_ttl_sec)) + if (settings.mode == ObjectStorageQueueMode::UNORDERED + && (settings.tracked_files_limit || settings.tracked_file_ttl_sec)) { task = Context::getGlobalContextInstance()->getSchedulePool().createTask( - "S3QueueCleanupFunc", + "ObjectStorageQueueCleanupFunc", [this] { cleanupThreadFunc(); }); task->activate(); task->scheduleAfter( generateRescheduleInterval( - settings.s3queue_cleanup_interval_min_ms, settings.s3queue_cleanup_interval_max_ms)); + settings.cleanup_interval_min_ms, settings.cleanup_interval_max_ms)); } + LOG_TRACE(log, "Mode: {}, buckets: {}, processing threads: {}, result buckets num: {}", + settings.mode.toString(), settings.buckets, settings.processing_threads_num, buckets_num); + } -S3QueueMetadata::~S3QueueMetadata() +ObjectStorageQueueMetadata::~ObjectStorageQueueMetadata() { shutdown(); } -void S3QueueMetadata::shutdown() +void ObjectStorageQueueMetadata::shutdown() { shutdown_called = true; if (task) task->deactivate(); } -void S3QueueMetadata::checkSettings(const S3QueueSettings & settings_) const +void ObjectStorageQueueMetadata::checkSettings(const ObjectStorageQueueSettings & settings_) const { - S3QueueTableMetadata::checkEquals(settings, settings_); + ObjectStorageQueueTableMetadata::checkEquals(settings, settings_); } -S3QueueMetadata::FileStatusPtr S3QueueMetadata::getFileStatus(const std::string & path) +ObjectStorageQueueMetadata::FileStatusPtr ObjectStorageQueueMetadata::getFileStatus(const std::string & path) { return local_file_statuses->get(path, /* create */false); } -S3QueueMetadata::FileStatuses S3QueueMetadata::getFileStatuses() const +ObjectStorageQueueMetadata::FileStatuses ObjectStorageQueueMetadata::getFileStatuses() const { return local_file_statuses->getAll(); } -S3QueueMetadata::FileMetadataPtr S3QueueMetadata::getFileMetadata( +ObjectStorageQueueMetadata::FileMetadataPtr ObjectStorageQueueMetadata::getFileMetadata( const std::string & path, - S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info) + ObjectStorageQueueOrderedFileMetadata::BucketInfoPtr bucket_info) { auto file_status = local_file_statuses->get(path, /* create */true); switch (settings.mode.value) { - case S3QueueMode::ORDERED: - return std::make_shared( + case ObjectStorageQueueMode::ORDERED: + return std::make_shared( zookeeper_path, path, file_status, bucket_info, buckets_num, - settings.s3queue_loading_retries, + settings.loading_retries, log); - case S3QueueMode::UNORDERED: - return std::make_shared( + case ObjectStorageQueueMode::UNORDERED: + return std::make_shared( zookeeper_path, path, file_status, - settings.s3queue_loading_retries, + settings.loading_retries, log); } } -size_t S3QueueMetadata::getBucketsNum(const S3QueueSettings & settings) -{ - if (settings.s3queue_buckets) - return settings.s3queue_buckets; - if (settings.s3queue_processing_threads_num) - return settings.s3queue_processing_threads_num; - return 0; -} - -size_t S3QueueMetadata::getBucketsNum(const S3QueueTableMetadata & settings) +size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueSettings & settings) { if (settings.buckets) return settings.buckets; @@ -206,32 +194,41 @@ size_t S3QueueMetadata::getBucketsNum(const S3QueueTableMetadata & settings) return 0; } -bool S3QueueMetadata::useBucketsForProcessing() const +size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueTableMetadata & settings) { - return settings.mode == S3QueueMode::ORDERED && (buckets_num > 1); + if (settings.buckets) + return settings.buckets; + if (settings.processing_threads_num) + return settings.processing_threads_num; + return 0; } -S3QueueMetadata::Bucket S3QueueMetadata::getBucketForPath(const std::string & path) const +bool ObjectStorageQueueMetadata::useBucketsForProcessing() const { - return S3QueueOrderedFileMetadata::getBucketForPath(path, buckets_num); + return settings.mode == ObjectStorageQueueMode::ORDERED && (buckets_num > 1); } -S3QueueOrderedFileMetadata::BucketHolderPtr -S3QueueMetadata::tryAcquireBucket(const Bucket & bucket, const Processor & processor) +ObjectStorageQueueMetadata::Bucket ObjectStorageQueueMetadata::getBucketForPath(const std::string & path) const { - return S3QueueOrderedFileMetadata::tryAcquireBucket(zookeeper_path, bucket, processor); + return ObjectStorageQueueOrderedFileMetadata::getBucketForPath(path, buckets_num); } -void S3QueueMetadata::initialize( +ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr +ObjectStorageQueueMetadata::tryAcquireBucket(const Bucket & bucket, const Processor & processor) +{ + return ObjectStorageQueueOrderedFileMetadata::tryAcquireBucket(zookeeper_path, bucket, processor, log); +} + +void ObjectStorageQueueMetadata::initialize( const ConfigurationPtr & configuration, const StorageInMemoryMetadata & storage_metadata) { - const auto metadata_from_table = S3QueueTableMetadata(*configuration, settings, storage_metadata); + const auto metadata_from_table = ObjectStorageQueueTableMetadata(*configuration, settings, storage_metadata); const auto & columns_from_table = storage_metadata.getColumns(); const auto table_metadata_path = zookeeper_path / "metadata"; - const auto metadata_paths = settings.mode == S3QueueMode::ORDERED - ? S3QueueOrderedFileMetadata::getMetadataPaths(buckets_num) - : S3QueueUnorderedFileMetadata::getMetadataPaths(); + const auto metadata_paths = settings.mode == ObjectStorageQueueMode::ORDERED + ? ObjectStorageQueueOrderedFileMetadata::getMetadataPaths(buckets_num) + : ObjectStorageQueueUnorderedFileMetadata::getMetadataPaths(); auto zookeeper = getZooKeeper(); zookeeper->createAncestors(zookeeper_path); @@ -240,7 +237,7 @@ void S3QueueMetadata::initialize( { if (zookeeper->exists(table_metadata_path)) { - const auto metadata_from_zk = S3QueueTableMetadata::parse(zookeeper->get(fs::path(zookeeper_path) / "metadata")); + const auto metadata_from_zk = ObjectStorageQueueTableMetadata::parse(zookeeper->get(fs::path(zookeeper_path) / "metadata")); const auto columns_from_zk = ColumnsDescription::parse(metadata_from_zk.columns); metadata_from_table.checkEquals(metadata_from_zk); @@ -265,8 +262,8 @@ void S3QueueMetadata::initialize( requests.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent)); } - if (!settings.s3queue_last_processed_path.value.empty()) - getFileMetadata(settings.s3queue_last_processed_path)->setProcessedAtStartRequests(requests, zookeeper); + if (!settings.last_processed_path.value.empty()) + getFileMetadata(settings.last_processed_path)->setProcessedAtStartRequests(requests, zookeeper); Coordination::Responses responses; auto code = zookeeper->tryMulti(requests, responses); @@ -290,10 +287,10 @@ void S3QueueMetadata::initialize( "of wrong zookeeper path or because of logical error"); } -void S3QueueMetadata::cleanupThreadFunc() +void ObjectStorageQueueMetadata::cleanupThreadFunc() { /// A background task is responsible for maintaining - /// settings.s3queue_tracked_files_limit and max_set_age settings for `unordered` processing mode. + /// settings.tracked_files_limit and max_set_age settings for `unordered` processing mode. if (shutdown_called) return; @@ -312,12 +309,12 @@ void S3QueueMetadata::cleanupThreadFunc() task->scheduleAfter( generateRescheduleInterval( - settings.s3queue_cleanup_interval_min_ms, settings.s3queue_cleanup_interval_max_ms)); + settings.cleanup_interval_min_ms, settings.cleanup_interval_max_ms)); } -void S3QueueMetadata::cleanupThreadFuncImpl() +void ObjectStorageQueueMetadata::cleanupThreadFuncImpl() { - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueCleanupMaxSetSizeOrTTLMicroseconds); + auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueueCleanupMaxSetSizeOrTTLMicroseconds); const auto zk_client = getZooKeeper(); const fs::path zookeeper_processed_path = zookeeper_path / "processed"; const fs::path zookeeper_failed_path = zookeeper_path / "failed"; @@ -355,11 +352,11 @@ void S3QueueMetadata::cleanupThreadFuncImpl() return; } - chassert(settings.s3queue_tracked_files_limit || settings.s3queue_tracked_file_ttl_sec); - const bool check_nodes_limit = settings.s3queue_tracked_files_limit > 0; - const bool check_nodes_ttl = settings.s3queue_tracked_file_ttl_sec > 0; + chassert(settings.tracked_files_limit || settings.tracked_file_ttl_sec); + const bool check_nodes_limit = settings.tracked_files_limit > 0; + const bool check_nodes_ttl = settings.tracked_file_ttl_sec > 0; - const bool nodes_limit_exceeded = nodes_num > settings.s3queue_tracked_files_limit; + const bool nodes_limit_exceeded = nodes_num > settings.tracked_files_limit; if ((!nodes_limit_exceeded || !check_nodes_limit) && !check_nodes_ttl) { LOG_TEST(log, "No limit exceeded"); @@ -381,7 +378,7 @@ void S3QueueMetadata::cleanupThreadFuncImpl() struct Node { std::string zk_path; - S3QueueIFileMetadata::NodeMetadata metadata; + ObjectStorageQueueIFileMetadata::NodeMetadata metadata; }; auto node_cmp = [](const Node & a, const Node & b) { @@ -402,7 +399,7 @@ void S3QueueMetadata::cleanupThreadFuncImpl() std::string metadata_str; if (zk_client->tryGet(path, metadata_str)) { - sorted_nodes.emplace(path, S3QueueIFileMetadata::NodeMetadata::fromString(metadata_str)); + sorted_nodes.emplace(path, ObjectStorageQueueIFileMetadata::NodeMetadata::fromString(metadata_str)); LOG_TEST(log, "Fetched metadata for node {}", path); } else @@ -432,9 +429,9 @@ void S3QueueMetadata::cleanupThreadFuncImpl() wb << fmt::format("Node: {}, path: {}, timestamp: {};\n", node, metadata.file_path, metadata.last_processed_timestamp); return wb.str(); }; - LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", settings.s3queue_tracked_files_limit, settings.s3queue_tracked_file_ttl_sec, get_nodes_str()); + LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", settings.tracked_files_limit, settings.tracked_file_ttl_sec, get_nodes_str()); - size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - settings.s3queue_tracked_files_limit : 0; + size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - settings.tracked_files_limit : 0; for (const auto & node : sorted_nodes) { if (nodes_to_remove) @@ -453,7 +450,7 @@ void S3QueueMetadata::cleanupThreadFuncImpl() else if (check_nodes_ttl) { UInt64 node_age = getCurrentTime() - node.metadata.last_processed_timestamp; - if (node_age >= settings.s3queue_tracked_file_ttl_sec) + if (node_age >= settings.tracked_file_ttl_sec) { LOG_TRACE(log, "Removing node at path {} ({}) because file ttl is reached", node.metadata.file_path, node.zk_path); diff --git a/src/Storages/S3Queue/S3QueueMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h similarity index 62% rename from src/Storages/S3Queue/S3QueueMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h index 25d01fb52b9..05060931b5a 100644 --- a/src/Storages/S3Queue/S3QueueMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h @@ -7,23 +7,23 @@ #include #include #include -#include -#include -#include +#include +#include +#include namespace fs = std::filesystem; namespace Poco { class Logger; } namespace DB { -struct S3QueueSettings; -class StorageS3Queue; -struct S3QueueTableMetadata; +struct ObjectStorageQueueSettings; +class StorageObjectStorageQueue; +struct ObjectStorageQueueTableMetadata; struct StorageInMemoryMetadata; using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; /** - * A class for managing S3Queue metadata in zookeeper, e.g. + * A class for managing ObjectStorageQueue metadata in zookeeper, e.g. * the following folders: * - /processed * - /processing @@ -35,7 +35,7 @@ using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; * - /processing * - /failed * - * Depending on S3Queue processing mode (ordered or unordered) + * Depending on ObjectStorageQueue processing mode (ordered or unordered) * we can differently store metadata in /processed node. * * Implements caching of zookeeper metadata for faster responses. @@ -44,24 +44,24 @@ using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; * In case of Unordered mode - if files TTL is enabled or maximum tracked files limit is set * starts a background cleanup thread which is responsible for maintaining them. */ -class S3QueueMetadata +class ObjectStorageQueueMetadata { public: - using FileStatus = S3QueueIFileMetadata::FileStatus; - using FileMetadataPtr = std::shared_ptr; + using FileStatus = ObjectStorageQueueIFileMetadata::FileStatus; + using FileMetadataPtr = std::shared_ptr; using FileStatusPtr = std::shared_ptr; using FileStatuses = std::unordered_map; using Bucket = size_t; using Processor = std::string; - S3QueueMetadata(const fs::path & zookeeper_path_, const S3QueueSettings & settings_); - ~S3QueueMetadata(); + ObjectStorageQueueMetadata(const fs::path & zookeeper_path_, const ObjectStorageQueueSettings & settings_); + ~ObjectStorageQueueMetadata(); void initialize(const ConfigurationPtr & configuration, const StorageInMemoryMetadata & storage_metadata); - void checkSettings(const S3QueueSettings & settings) const; + void checkSettings(const ObjectStorageQueueSettings & settings) const; void shutdown(); - FileMetadataPtr getFileMetadata(const std::string & path, S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info = {}); + FileMetadataPtr getFileMetadata(const std::string & path, ObjectStorageQueueOrderedFileMetadata::BucketInfoPtr bucket_info = {}); FileStatusPtr getFileStatus(const std::string & path); FileStatuses getFileStatuses() const; @@ -69,16 +69,16 @@ public: /// Method of Ordered mode parallel processing. bool useBucketsForProcessing() const; Bucket getBucketForPath(const std::string & path) const; - S3QueueOrderedFileMetadata::BucketHolderPtr tryAcquireBucket(const Bucket & bucket, const Processor & processor); + ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr tryAcquireBucket(const Bucket & bucket, const Processor & processor); - static size_t getBucketsNum(const S3QueueSettings & settings); - static size_t getBucketsNum(const S3QueueTableMetadata & settings); + static size_t getBucketsNum(const ObjectStorageQueueSettings & settings); + static size_t getBucketsNum(const ObjectStorageQueueTableMetadata & settings); private: void cleanupThreadFunc(); void cleanupThreadFuncImpl(); - const S3QueueSettings settings; + const ObjectStorageQueueSettings settings; const fs::path zookeeper_path; const size_t buckets_num; diff --git a/src/Storages/S3Queue/S3QueueMetadataFactory.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.cpp similarity index 62% rename from src/Storages/S3Queue/S3QueueMetadataFactory.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.cpp index a319b21ca3e..ffae33d6f41 100644 --- a/src/Storages/S3Queue/S3QueueMetadataFactory.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.cpp @@ -1,4 +1,4 @@ -#include +#include #include namespace DB @@ -8,20 +8,20 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -S3QueueMetadataFactory & S3QueueMetadataFactory::instance() +ObjectStorageQueueMetadataFactory & ObjectStorageQueueMetadataFactory::instance() { - static S3QueueMetadataFactory ret; + static ObjectStorageQueueMetadataFactory ret; return ret; } -S3QueueMetadataFactory::FilesMetadataPtr -S3QueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const S3QueueSettings & settings) +ObjectStorageQueueMetadataFactory::FilesMetadataPtr +ObjectStorageQueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const ObjectStorageQueueSettings & settings) { std::lock_guard lock(mutex); auto it = metadata_by_path.find(zookeeper_path); if (it == metadata_by_path.end()) { - auto files_metadata = std::make_shared(zookeeper_path, settings); + auto files_metadata = std::make_shared(zookeeper_path, settings); it = metadata_by_path.emplace(zookeeper_path, std::move(files_metadata)).first; } else @@ -32,7 +32,7 @@ S3QueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const S3 return it->second.metadata; } -void S3QueueMetadataFactory::remove(const std::string & zookeeper_path) +void ObjectStorageQueueMetadataFactory::remove(const std::string & zookeeper_path) { std::lock_guard lock(mutex); auto it = metadata_by_path.find(zookeeper_path); @@ -57,9 +57,9 @@ void S3QueueMetadataFactory::remove(const std::string & zookeeper_path) } } -std::unordered_map S3QueueMetadataFactory::getAll() +std::unordered_map ObjectStorageQueueMetadataFactory::getAll() { - std::unordered_map result; + std::unordered_map result; for (const auto & [zk_path, metadata_and_ref_count] : metadata_by_path) result.emplace(zk_path, metadata_and_ref_count.metadata); return result; diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.h new file mode 100644 index 00000000000..a93f5ee3d83 --- /dev/null +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.h @@ -0,0 +1,37 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class ObjectStorageQueueMetadataFactory final : private boost::noncopyable +{ +public: + using FilesMetadataPtr = std::shared_ptr; + + static ObjectStorageQueueMetadataFactory & instance(); + + FilesMetadataPtr getOrCreate(const std::string & zookeeper_path, const ObjectStorageQueueSettings & settings); + + void remove(const std::string & zookeeper_path); + + std::unordered_map getAll(); + +private: + struct Metadata + { + explicit Metadata(std::shared_ptr metadata_) : metadata(metadata_), ref_count(1) {} + + std::shared_ptr metadata; + /// TODO: the ref count should be kept in keeper, because of the case with distributed processing. + size_t ref_count = 0; + }; + using MetadataByPath = std::unordered_map; + + MetadataByPath metadata_by_path; + std::mutex mutex; +}; + +} diff --git a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp similarity index 84% rename from src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp index bac87c95cc9..3b711a892c9 100644 --- a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -16,7 +16,7 @@ namespace ErrorCodes namespace { - S3QueueOrderedFileMetadata::Bucket getBucketForPathImpl(const std::string & path, size_t buckets_num) + ObjectStorageQueueOrderedFileMetadata::Bucket getBucketForPathImpl(const std::string & path, size_t buckets_num) { return sipHash64(path) % buckets_num; } @@ -40,28 +40,32 @@ namespace } } -S3QueueOrderedFileMetadata::BucketHolder::BucketHolder( +ObjectStorageQueueOrderedFileMetadata::BucketHolder::BucketHolder( const Bucket & bucket_, int bucket_version_, const std::string & bucket_lock_path_, const std::string & bucket_lock_id_path_, - zkutil::ZooKeeperPtr zk_client_) + zkutil::ZooKeeperPtr zk_client_, + LoggerPtr log_) : bucket_info(std::make_shared(BucketInfo{ .bucket = bucket_, .bucket_version = bucket_version_, .bucket_lock_path = bucket_lock_path_, .bucket_lock_id_path = bucket_lock_id_path_})) , zk_client(zk_client_) + , log(log_) { } -void S3QueueOrderedFileMetadata::BucketHolder::release() +void ObjectStorageQueueOrderedFileMetadata::BucketHolder::release() { if (released) return; released = true; - LOG_TEST(getLogger("S3QueueBucketHolder"), "Releasing bucket {}", bucket_info->bucket); + + LOG_TEST(log, "Releasing bucket {}, version {}", + bucket_info->bucket, bucket_info->bucket_version); Coordination::Requests requests; /// Check that bucket lock version has not changed @@ -72,11 +76,24 @@ void S3QueueOrderedFileMetadata::BucketHolder::release() Coordination::Responses responses; const auto code = zk_client->tryMulti(requests, responses); + + if (code == Coordination::Error::ZOK) + LOG_TEST(log, "Released bucket {}, version {}", + bucket_info->bucket, bucket_info->bucket_version); + else + LOG_TRACE(log, + "Failed to release bucket {}, version {}: {}. " + "This is normal if keeper session expired.", + bucket_info->bucket, bucket_info->bucket_version, code); + zkutil::KeeperMultiException::check(code, requests, responses); } -S3QueueOrderedFileMetadata::BucketHolder::~BucketHolder() +ObjectStorageQueueOrderedFileMetadata::BucketHolder::~BucketHolder() { + if (!released) + LOG_TEST(log, "Releasing bucket ({}) holder in destructor", bucket_info->bucket); + try { release(); @@ -87,7 +104,7 @@ S3QueueOrderedFileMetadata::BucketHolder::~BucketHolder() } } -S3QueueOrderedFileMetadata::S3QueueOrderedFileMetadata( +ObjectStorageQueueOrderedFileMetadata::ObjectStorageQueueOrderedFileMetadata( const std::filesystem::path & zk_path_, const std::string & path_, FileStatusPtr file_status_, @@ -95,7 +112,7 @@ S3QueueOrderedFileMetadata::S3QueueOrderedFileMetadata( size_t buckets_num_, size_t max_loading_retries_, LoggerPtr log_) - : S3QueueIFileMetadata( + : ObjectStorageQueueIFileMetadata( path_, /* processing_node_path */zk_path_ / "processing" / getNodeName(path_), /* processed_node_path */getProcessedPath(zk_path_, path_, buckets_num_), @@ -109,7 +126,7 @@ S3QueueOrderedFileMetadata::S3QueueOrderedFileMetadata( { } -std::vector S3QueueOrderedFileMetadata::getMetadataPaths(size_t buckets_num) +std::vector ObjectStorageQueueOrderedFileMetadata::getMetadataPaths(size_t buckets_num) { if (buckets_num > 1) { @@ -122,7 +139,7 @@ std::vector S3QueueOrderedFileMetadata::getMetadataPaths(size_t buc return {"failed", "processing"}; } -bool S3QueueOrderedFileMetadata::getMaxProcessedFile( +bool ObjectStorageQueueOrderedFileMetadata::getMaxProcessedFile( NodeMetadata & result, Coordination::Stat * stat, const zkutil::ZooKeeperPtr & zk_client) @@ -130,7 +147,7 @@ bool S3QueueOrderedFileMetadata::getMaxProcessedFile( return getMaxProcessedFile(result, stat, processed_node_path, zk_client); } -bool S3QueueOrderedFileMetadata::getMaxProcessedFile( +bool ObjectStorageQueueOrderedFileMetadata::getMaxProcessedFile( NodeMetadata & result, Coordination::Stat * stat, const std::string & processed_node_path_, @@ -146,15 +163,16 @@ bool S3QueueOrderedFileMetadata::getMaxProcessedFile( return false; } -S3QueueOrderedFileMetadata::Bucket S3QueueOrderedFileMetadata::getBucketForPath(const std::string & path_, size_t buckets_num) +ObjectStorageQueueOrderedFileMetadata::Bucket ObjectStorageQueueOrderedFileMetadata::getBucketForPath(const std::string & path_, size_t buckets_num) { return getBucketForPathImpl(path_, buckets_num); } -S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcquireBucket( +ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr ObjectStorageQueueOrderedFileMetadata::tryAcquireBucket( const std::filesystem::path & zk_path, const Bucket & bucket, - const Processor & processor) + const Processor & processor, + LoggerPtr log_) { const auto zk_client = getZooKeeper(); const auto bucket_lock_path = zk_path / "buckets" / toString(bucket) / "lock"; @@ -172,7 +190,7 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui bucket_lock_id_path, processor_info, zkutil::CreateMode::Persistent, /* ignore_if_exists */true)); /// Update bucket lock id path. We use its version as a version of ephemeral bucket lock node. - /// (See comment near S3QueueIFileMetadata::processing_node_version). + /// (See comment near ObjectStorageQueueIFileMetadata::processing_node_version). requests.push_back(zkutil::makeSetRequest(bucket_lock_id_path, processor_info, -1)); Coordination::Responses responses; @@ -183,7 +201,7 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui const auto bucket_lock_version = set_response->stat.version; LOG_TEST( - getLogger("S3QueueOrderedFileMetadata"), + log_, "Processor {} acquired bucket {} for processing (bucket lock version: {})", processor, bucket, bucket_lock_version); @@ -192,7 +210,8 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui bucket_lock_version, bucket_lock_path, bucket_lock_id_path, - zk_client); + zk_client, + log_); } if (code == Coordination::Error::ZNODEEXISTS) @@ -204,7 +223,7 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error: {}", code); } -std::pair S3QueueOrderedFileMetadata::setProcessingImpl() +std::pair ObjectStorageQueueOrderedFileMetadata::setProcessingImpl() { /// In one zookeeper transaction do the following: enum RequestType @@ -300,7 +319,7 @@ std::pair S3QueueOrderedFileMetad } } -void S3QueueOrderedFileMetadata::setProcessedAtStartRequests( +void ObjectStorageQueueOrderedFileMetadata::setProcessedAtStartRequests( Coordination::Requests & requests, const zkutil::ZooKeeperPtr & zk_client) { @@ -318,7 +337,7 @@ void S3QueueOrderedFileMetadata::setProcessedAtStartRequests( } } -void S3QueueOrderedFileMetadata::setProcessedRequests( +void ObjectStorageQueueOrderedFileMetadata::setProcessedRequests( Coordination::Requests & requests, const zkutil::ZooKeeperPtr & zk_client, const std::string & processed_node_path_, @@ -359,7 +378,7 @@ void S3QueueOrderedFileMetadata::setProcessedRequests( } } -void S3QueueOrderedFileMetadata::setProcessedImpl() +void ObjectStorageQueueOrderedFileMetadata::setProcessedImpl() { /// In one zookeeper transaction do the following: enum RequestType @@ -384,8 +403,11 @@ void S3QueueOrderedFileMetadata::setProcessedImpl() auto code = zk_client->tryMulti(requests, responses); if (code == Coordination::Error::ZOK) { - if (max_loading_retries) - zk_client->tryRemove(failed_node_path + ".retriable", -1); + if (max_loading_retries + && zk_client->tryRemove(failed_node_path + ".retriable", -1) == Coordination::Error::ZOK) + { + LOG_TEST(log, "Removed node {}.retriable", failed_node_path); + } return; } diff --git a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.h similarity index 77% rename from src/Storages/S3Queue/S3QueueOrderedFileMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.h index 698ec0f54cc..9a997838f4d 100644 --- a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include #include @@ -7,7 +7,7 @@ namespace DB { -class S3QueueOrderedFileMetadata : public S3QueueIFileMetadata +class ObjectStorageQueueOrderedFileMetadata : public ObjectStorageQueueIFileMetadata { public: using Processor = std::string; @@ -21,7 +21,7 @@ public: }; using BucketInfoPtr = std::shared_ptr; - explicit S3QueueOrderedFileMetadata( + explicit ObjectStorageQueueOrderedFileMetadata( const std::filesystem::path & zk_path_, const std::string & path_, FileStatusPtr file_status_, @@ -36,9 +36,10 @@ public: static BucketHolderPtr tryAcquireBucket( const std::filesystem::path & zk_path, const Bucket & bucket, - const Processor & processor); + const Processor & processor, + LoggerPtr log_); - static S3QueueOrderedFileMetadata::Bucket getBucketForPath(const std::string & path, size_t buckets_num); + static ObjectStorageQueueOrderedFileMetadata::Bucket getBucketForPath(const std::string & path, size_t buckets_num); static std::vector getMetadataPaths(size_t buckets_num); @@ -72,26 +73,32 @@ private: bool ignore_if_exists); }; -struct S3QueueOrderedFileMetadata::BucketHolder +struct ObjectStorageQueueOrderedFileMetadata::BucketHolder : private boost::noncopyable { BucketHolder( const Bucket & bucket_, int bucket_version_, const std::string & bucket_lock_path_, const std::string & bucket_lock_id_path_, - zkutil::ZooKeeperPtr zk_client_); + zkutil::ZooKeeperPtr zk_client_, + LoggerPtr log_); ~BucketHolder(); Bucket getBucket() const { return bucket_info->bucket; } BucketInfoPtr getBucketInfo() const { return bucket_info; } + void setFinished() { finished = true; } + bool isFinished() const { return finished; } + void release(); private: BucketInfoPtr bucket_info; const zkutil::ZooKeeperPtr zk_client; bool released = false; + bool finished = false; + LoggerPtr log; }; } diff --git a/src/Storages/S3Queue/S3QueueSettings.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp similarity index 52% rename from src/Storages/S3Queue/S3QueueSettings.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp index cb312adc5d9..67743db6197 100644 --- a/src/Storages/S3Queue/S3QueueSettings.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -13,14 +13,23 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } -IMPLEMENT_SETTINGS_TRAITS(S3QueueSettingsTraits, LIST_OF_S3QUEUE_SETTINGS) +IMPLEMENT_SETTINGS_TRAITS(ObjectStorageQueueSettingsTraits, LIST_OF_OBJECT_STORAGE_QUEUE_SETTINGS) -void S3QueueSettings::loadFromQuery(ASTStorage & storage_def) +void ObjectStorageQueueSettings::loadFromQuery(ASTStorage & storage_def) { if (storage_def.settings) { try { + /// We support settings starting with s3_ for compatibility. + for (auto & change : storage_def.settings->changes) + { + if (change.name.starts_with("s3queue_")) + change.name = change.name.substr(std::strlen("s3queue_")); + if (change.name == "enable_logging_to_s3queue_log") + change.name = "enable_logging_to_queue_log"; + } + applyChanges(storage_def.settings->changes); } catch (Exception & e) diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h new file mode 100644 index 00000000000..ea008c2334e --- /dev/null +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +class ASTStorage; + + +#define OBJECT_STORAGE_QUEUE_RELATED_SETTINGS(M, ALIAS) \ + M(ObjectStorageQueueMode, \ + mode, \ + ObjectStorageQueueMode::ORDERED, \ + "With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKepeer." \ + "With ordered mode, only the max name of the successfully consumed file stored.", \ + 0) \ + M(ObjectStorageQueueAction, after_processing, ObjectStorageQueueAction::KEEP, "Delete or keep file in after successful processing", 0) \ + M(String, keeper_path, "", "Zookeeper node path", 0) \ + M(UInt32, loading_retries, 10, "Retry loading up to specified number of times", 0) \ + M(UInt32, processing_threads_num, 1, "Number of processing threads", 0) \ + M(UInt32, enable_logging_to_queue_log, 1, "Enable logging to system table system.(s3/azure_)queue_log", 0) \ + M(String, last_processed_path, "", "For Ordered mode. Files that have lexicographically smaller file name are considered already processed", 0) \ + M(UInt32, tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \ + M(UInt32, polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \ + M(UInt32, polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \ + M(UInt32, polling_backoff_ms, 1000, "Polling backoff", 0) \ + M(UInt32, tracked_files_limit, 1000, "For unordered mode. Max set size for tracking processed files in ZooKeeper", 0) \ + M(UInt32, cleanup_interval_min_ms, 60000, "For unordered mode. Polling backoff min for cleanup", 0) \ + M(UInt32, cleanup_interval_max_ms, 60000, "For unordered mode. Polling backoff max for cleanup", 0) \ + M(UInt32, buckets, 0, "Number of buckets for Ordered mode parallel processing", 0) \ + M(UInt32, max_processed_files_before_commit, 100, "Number of files which can be processed before being committed to keeper", 0) \ + M(UInt32, max_processed_rows_before_commit, 0, "Number of rows which can be processed before being committed to keeper", 0) \ + M(UInt32, max_processed_bytes_before_commit, 0, "Number of bytes which can be processed before being committed to keeper", 0) \ + M(UInt32, max_processing_time_sec_before_commit, 0, "Timeout in seconds after which to commit files committed to keeper", 0) \ + +#define LIST_OF_OBJECT_STORAGE_QUEUE_SETTINGS(M, ALIAS) \ + OBJECT_STORAGE_QUEUE_RELATED_SETTINGS(M, ALIAS) \ + LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) + +DECLARE_SETTINGS_TRAITS(ObjectStorageQueueSettingsTraits, LIST_OF_OBJECT_STORAGE_QUEUE_SETTINGS) + + +struct ObjectStorageQueueSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); +}; + +} diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp similarity index 50% rename from src/Storages/S3Queue/S3QueueSource.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index b5b1a8dd992..2effbe7e7c2 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -5,20 +5,14 @@ #include #include #include -#include +#include #include #include -namespace CurrentMetrics -{ - extern const Metric StorageS3Threads; - extern const Metric StorageS3ThreadsActive; -} - namespace ProfileEvents { - extern const Event S3QueuePullMicroseconds; + extern const Event ObjectStorageQueuePullMicroseconds; } namespace DB @@ -26,25 +20,24 @@ namespace DB namespace ErrorCodes { - extern const int S3_ERROR; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; } -StorageS3QueueSource::S3QueueObjectInfo::S3QueueObjectInfo( - const ObjectInfo & object_info, - Metadata::FileMetadataPtr processing_holder_) - : ObjectInfo(object_info.relative_path, object_info.metadata) - , processing_holder(processing_holder_) +ObjectStorageQueueSource::ObjectStorageQueueObjectInfo::ObjectStorageQueueObjectInfo( + const Source::ObjectInfo & object_info, + ObjectStorageQueueMetadata::FileMetadataPtr file_metadata_) + : Source::ObjectInfo(object_info.relative_path, object_info.metadata) + , file_metadata(file_metadata_) { } -StorageS3QueueSource::FileIterator::FileIterator( - std::shared_ptr metadata_, - std::unique_ptr glob_iterator_, +ObjectStorageQueueSource::FileIterator::FileIterator( + std::shared_ptr metadata_, + std::unique_ptr glob_iterator_, std::atomic & shutdown_called_, LoggerPtr logger_) - : StorageObjectStorageSource::IIterator("S3QueueIterator") + : StorageObjectStorageSource::IIterator("ObjectStorageQueueIterator") , metadata(metadata_) , glob_iterator(std::move(glob_iterator_)) , shutdown_called(shutdown_called_) @@ -52,25 +45,52 @@ StorageS3QueueSource::FileIterator::FileIterator( { } -size_t StorageS3QueueSource::FileIterator::estimatedKeysCount() +bool ObjectStorageQueueSource::FileIterator::isFinished() const +{ + LOG_TEST(log, "Iterator finished: {}, objects to retry: {}", iterator_finished, objects_to_retry.size()); + return iterator_finished + && std::all_of(listed_keys_cache.begin(), listed_keys_cache.end(), [](const auto & v) { return v.second.keys.empty(); }) + && objects_to_retry.empty(); +} + +size_t ObjectStorageQueueSource::FileIterator::estimatedKeysCount() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method estimateKeysCount is not implemented"); } -StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::nextImpl(size_t processor) +ObjectStorageQueueSource::Source::ObjectInfoPtr ObjectStorageQueueSource::FileIterator::nextImpl(size_t processor) { - ObjectInfoPtr object_info; - S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info; + Source::ObjectInfoPtr object_info; + ObjectStorageQueueOrderedFileMetadata::BucketInfoPtr bucket_info; while (!shutdown_called) { if (metadata->useBucketsForProcessing()) + { + std::lock_guard lock(mutex); std::tie(object_info, bucket_info) = getNextKeyFromAcquiredBucket(processor); + } else - object_info = glob_iterator->next(processor); + { + std::lock_guard lock(mutex); + if (objects_to_retry.empty()) + { + object_info = glob_iterator->next(processor); + if (!object_info) + iterator_finished = true; + } + else + { + object_info = objects_to_retry.front(); + objects_to_retry.pop_front(); + } + } if (!object_info) + { + LOG_TEST(log, "No object left"); return {}; + } if (shutdown_called) { @@ -80,24 +100,69 @@ StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::nextImpl auto file_metadata = metadata->getFileMetadata(object_info->relative_path, bucket_info); if (file_metadata->setProcessing()) - return std::make_shared(*object_info, file_metadata); + return std::make_shared(*object_info, file_metadata); } return {}; } -std::pair -StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processor) +void ObjectStorageQueueSource::FileIterator::returnForRetry(Source::ObjectInfoPtr object_info) { - /// We need this lock to maintain consistency between listing s3 directory - /// and getting/putting result into listed_keys_cache. - std::lock_guard lock(buckets_mutex); + chassert(object_info); + if (metadata->useBucketsForProcessing()) + { + const auto bucket = metadata->getBucketForPath(object_info->relative_path); + listed_keys_cache[bucket].keys.emplace_front(object_info); + } + else + { + objects_to_retry.push_back(object_info); + } +} + +void ObjectStorageQueueSource::FileIterator::releaseFinishedBuckets() +{ + for (const auto & [processor, holders] : bucket_holders) + { + LOG_TEST(log, "Releasing {} bucket holders for processor {}", holders.size(), processor); + + for (auto it = holders.begin(); it != holders.end(); ++it) + { + const auto & holder = *it; + const auto bucket = holder->getBucketInfo()->bucket; + if (!holder->isFinished()) + { + /// Only the last holder in the list of holders can be non-finished. + chassert(std::next(it) == holders.end()); + + /// Do not release non-finished bucket holder. We will continue processing it. + LOG_TEST(log, "Bucket {} is not finished yet, will not release it", bucket); + break; + } + + /// Release bucket lock. + holder->release(); + + /// Reset bucket processor in cached state. + auto cached_info = listed_keys_cache.find(bucket); + if (cached_info != listed_keys_cache.end()) + cached_info->second.processor.reset(); + } + } +} + +std::pair +ObjectStorageQueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processor) +{ + auto bucket_holder_it = bucket_holders.emplace(processor, std::vector{}).first; + BucketHolder * current_bucket_holder = bucket_holder_it->second.empty() || bucket_holder_it->second.back()->isFinished() + ? nullptr + : bucket_holder_it->second.back().get(); - auto bucket_holder_it = bucket_holders.emplace(processor, nullptr).first; auto current_processor = toString(processor); LOG_TEST( log, "Current processor: {}, acquired bucket: {}", - processor, bucket_holder_it->second ? toString(bucket_holder_it->second->getBucket()) : "None"); + processor, current_bucket_holder ? toString(current_bucket_holder->getBucket()) : "None"); while (true) { @@ -106,9 +171,9 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo /// In case it is already acquired, they put the key into listed_keys_cache, /// so that the thread who acquired the bucket will be able to see /// those keys without the need to list s3 directory once again. - if (bucket_holder_it->second) + if (current_bucket_holder) { - const auto bucket = bucket_holder_it->second->getBucket(); + const auto bucket = current_bucket_holder->getBucket(); auto it = listed_keys_cache.find(bucket); if (it != listed_keys_cache.end()) { @@ -141,7 +206,7 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo LOG_TEST(log, "Current bucket: {}, will process file: {}", bucket, object_info->getFileName()); - return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; + return std::pair{object_info, current_bucket_holder->getBucketInfo()}; } LOG_TEST(log, "Cache of bucket {} is empty", bucket); @@ -156,19 +221,21 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo if (iterator_finished) { - /// Bucket is fully processed - release the bucket. - bucket_holder_it->second->release(); - bucket_holder_it->second.reset(); + /// Bucket is fully processed, but we will release it later + /// - once we write and commit files via commit() method. + current_bucket_holder->setFinished(); } } /// If processing thread has already acquired some bucket - /// and while listing s3 directory gets a key which is in a different bucket, + /// and while listing object storage directory gets a key which is in a different bucket, /// it puts the key into listed_keys_cache to allow others to process it, /// because one processing thread can acquire only one bucket at a time. /// Once a thread is finished with its acquired bucket, it checks listed_keys_cache /// to see if there are keys from buckets not acquired by anyone. - if (!bucket_holder_it->second) + if (!current_bucket_holder) { + LOG_TEST(log, "Checking caches keys: {}", listed_keys_cache.size()); + for (auto it = listed_keys_cache.begin(); it != listed_keys_cache.end();) { auto & [bucket, bucket_info] = *it; @@ -193,8 +260,8 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo continue; } - bucket_holder_it->second = metadata->tryAcquireBucket(bucket, current_processor); - if (!bucket_holder_it->second) + auto acquired_bucket = metadata->tryAcquireBucket(bucket, current_processor); + if (!acquired_bucket) { LOG_TEST(log, "Bucket {} is already locked for processing (keys: {})", bucket, bucket_keys.size()); @@ -202,6 +269,9 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo continue; } + bucket_holder_it->second.push_back(acquired_bucket); + current_bucket_holder = bucket_holder_it->second.back().get(); + bucket_processor = current_processor; /// Take the key from the front, the order is important. @@ -211,7 +281,7 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo LOG_TEST(log, "Acquired bucket: {}, will process file: {}", bucket, object_info->getFileName()); - return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; + return std::pair{object_info, current_bucket_holder->getBucketInfo()}; } } @@ -229,12 +299,12 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo LOG_TEST(log, "Found next file: {}, bucket: {}, current bucket: {}, cached_keys: {}", object_info->getFileName(), bucket, - bucket_holder_it->second ? toString(bucket_holder_it->second->getBucket()) : "None", + current_bucket_holder ? toString(current_bucket_holder->getBucket()) : "None", bucket_cache.keys.size()); - if (bucket_holder_it->second) + if (current_bucket_holder) { - if (bucket_holder_it->second->getBucket() != bucket) + if (current_bucket_holder->getBucket() != bucket) { /// Acquired bucket differs from object's bucket, /// put it into bucket's cache and continue. @@ -242,13 +312,16 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo continue; } /// Bucket is already acquired, process the file. - return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; + return std::pair{object_info, current_bucket_holder->getBucketInfo()}; } else { - bucket_holder_it->second = metadata->tryAcquireBucket(bucket, current_processor); - if (bucket_holder_it->second) + auto acquired_bucket = metadata->tryAcquireBucket(bucket, current_processor); + if (acquired_bucket) { + bucket_holder_it->second.push_back(acquired_bucket); + current_bucket_holder = bucket_holder_it->second.back().get(); + bucket_cache.processor = current_processor; if (!bucket_cache.keys.empty()) { @@ -258,7 +331,7 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo object_info = bucket_cache.keys.front(); bucket_cache.keys.pop_front(); } - return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; + return std::pair{object_info, current_bucket_holder->getBucketInfo()}; } else { @@ -270,12 +343,6 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo } else { - if (bucket_holder_it->second) - { - bucket_holder_it->second->release(); - bucket_holder_it->second.reset(); - } - LOG_TEST(log, "Reached the end of file iterator"); iterator_finished = true; @@ -287,21 +354,26 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo } } -StorageS3QueueSource::StorageS3QueueSource( +ObjectStorageQueueSource::ObjectStorageQueueSource( String name_, size_t processor_id_, const Block & header_, std::unique_ptr internal_source_, - std::shared_ptr files_metadata_, - const S3QueueAction & action_, + std::shared_ptr files_metadata_, + const ObjectStorageQueueAction & action_, RemoveFileFunc remove_file_func_, const NamesAndTypesList & requested_virtual_columns_, ContextPtr context_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, - std::shared_ptr s3_queue_log_, + std::shared_ptr system_queue_log_, const StorageID & storage_id_, - LoggerPtr log_) + LoggerPtr log_, + size_t max_processed_files_before_commit_, + size_t max_processed_rows_before_commit_, + size_t max_processed_bytes_before_commit_, + size_t max_processing_time_sec_before_commit_, + bool commit_once_processed_) : ISource(header_) , WithContext(context_) , name(std::move(name_)) @@ -312,41 +384,74 @@ StorageS3QueueSource::StorageS3QueueSource( , requested_virtual_columns(requested_virtual_columns_) , shutdown_called(shutdown_called_) , table_is_being_dropped(table_is_being_dropped_) - , s3_queue_log(s3_queue_log_) + , system_queue_log(system_queue_log_) , storage_id(storage_id_) + , max_processed_files_before_commit(max_processed_files_before_commit_) + , max_processed_rows_before_commit(max_processed_rows_before_commit_) + , max_processed_bytes_before_commit(max_processed_bytes_before_commit_) + , max_processing_time_sec_before_commit(max_processing_time_sec_before_commit_) + , commit_once_processed(commit_once_processed_) , remove_file_func(remove_file_func_) , log(log_) { } -String StorageS3QueueSource::getName() const +String ObjectStorageQueueSource::getName() const { return name; } -void StorageS3QueueSource::lazyInitialize(size_t processor) +void ObjectStorageQueueSource::lazyInitialize(size_t processor) { if (initialized) return; + LOG_TEST(log, "Initializing a new reader"); + internal_source->lazyInitialize(processor); reader = std::move(internal_source->reader); if (reader) reader_future = std::move(internal_source->reader_future); + initialized = true; } -Chunk StorageS3QueueSource::generate() +Chunk ObjectStorageQueueSource::generate() +{ + Chunk chunk; + try + { + chunk = generateImpl(); + } + catch (...) + { + if (commit_once_processed) + commit(false, getCurrentExceptionMessage(true)); + + throw; + } + + if (!chunk && commit_once_processed) + { + commit(true); + } + return chunk; +} + +Chunk ObjectStorageQueueSource::generateImpl() { lazyInitialize(processor_id); while (true) { if (!reader) + { + LOG_TEST(log, "No reader"); break; + } - const auto * object_info = dynamic_cast(&reader.getObjectInfo()); - auto file_metadata = object_info->processing_holder; + const auto * object_info = dynamic_cast(reader.getObjectInfo().get()); + auto file_metadata = object_info->file_metadata; auto file_status = file_metadata->getFileStatus(); if (isCancelled()) @@ -357,7 +462,7 @@ Chunk StorageS3QueueSource::generate() { try { - file_metadata->setFailed("Cancelled"); + file_metadata->setFailed("Cancelled", /* reduce_retry_count */true, /* overwrite_status */false); } catch (...) { @@ -365,16 +470,19 @@ Chunk StorageS3QueueSource::generate() object_info->relative_path, getCurrentExceptionMessage(true)); } - appendLogElement(reader.getObjectInfo().getPath(), *file_status, processed_rows_from_file, false); + appendLogElement(reader.getObjectInfo()->getPath(), *file_status, processed_rows_from_file, false); } + LOG_TEST(log, "Query is cancelled"); break; } - const auto & path = reader.getObjectInfo().getPath(); + const auto & path = reader.getObjectInfo()->getPath(); if (shutdown_called) { + LOG_TEST(log, "Shutdown called"); + if (processed_rows_from_file == 0) break; @@ -386,7 +494,7 @@ Chunk StorageS3QueueSource::generate() try { - file_metadata->setFailed("Table is dropped"); + file_metadata->setFailed("Table is dropped", /* reduce_retry_count */true, /* overwrite_status */false); } catch (...) { @@ -407,11 +515,11 @@ Chunk StorageS3QueueSource::generate() auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters); SCOPE_EXIT({ CurrentThread::get().attachProfileCountersScope(prev_scope); }); - /// FIXME: if files are compressed, profile counters update does not work fully (s3 related counters are not saved). Why? + /// FIXME: if files are compressed, profile counters update does not work fully (object storage related counters are not saved). Why? try { - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueuePullMicroseconds); + auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueuePullMicroseconds); Chunk chunk; if (reader->pull(chunk)) @@ -420,15 +528,16 @@ Chunk StorageS3QueueSource::generate() file_status->processed_rows += chunk.getNumRows(); processed_rows_from_file += chunk.getNumRows(); + total_processed_rows += chunk.getNumRows(); + total_processed_bytes += chunk.bytes(); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( chunk, requested_virtual_columns, { .path = path, - .size = reader.getObjectInfo().metadata->size_bytes + .size = reader.getObjectInfo()->metadata->size_bytes }); - return chunk; } } @@ -437,22 +546,84 @@ Chunk StorageS3QueueSource::generate() const auto message = getCurrentExceptionMessage(true); LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", path, message); - file_metadata->setFailed(message); - + failed_during_read_files.push_back(file_metadata); + file_status->onFailed(getCurrentExceptionMessage(true)); appendLogElement(path, *file_status, processed_rows_from_file, false); + + if (processed_rows_from_file == 0) + { + auto * file_iterator = dynamic_cast(internal_source->file_iterator.get()); + chassert(file_iterator); + + if (file_status->retries < file_metadata->getMaxTries()) + file_iterator->returnForRetry(reader.getObjectInfo()); + + /// If we did not process any rows from the failed file, + /// commit all previously processed files, + /// not to lose the work already done. + return {}; + } + throw; } - file_metadata->setProcessed(); - applyActionAfterProcessing(reader.getObjectInfo().relative_path); - appendLogElement(path, *file_status, processed_rows_from_file, true); + + file_status->setProcessingEndTime(); file_status.reset(); + processed_rows_from_file = 0; + processed_files.push_back(file_metadata); + + if (processed_files.size() == max_processed_files_before_commit) + { + LOG_TRACE(log, "Number of max processed files before commit reached " + "(rows: {}, bytes: {}, files: {})", + total_processed_rows, total_processed_bytes, processed_files.size()); + break; + } + + bool rows_or_bytes_or_time_limit_reached = false; + if (max_processed_rows_before_commit + && total_processed_rows == max_processed_rows_before_commit) + { + LOG_TRACE(log, "Number of max processed rows before commit reached " + "(rows: {}, bytes: {}, files: {})", + total_processed_rows, total_processed_bytes, processed_files.size()); + + rows_or_bytes_or_time_limit_reached = true; + } + else if (max_processed_bytes_before_commit + && total_processed_bytes == max_processed_bytes_before_commit) + { + LOG_TRACE(log, "Number of max processed bytes before commit reached " + "(rows: {}, bytes: {}, files: {})", + total_processed_rows, total_processed_bytes, processed_files.size()); + + rows_or_bytes_or_time_limit_reached = true; + } + else if (max_processing_time_sec_before_commit + && total_stopwatch.elapsedSeconds() >= max_processing_time_sec_before_commit) + { + LOG_TRACE(log, "Max processing time before commit reached " + "(rows: {}, bytes: {}, files: {})", + total_processed_rows, total_processed_bytes, processed_files.size()); + + rows_or_bytes_or_time_limit_reached = true; + } + + if (rows_or_bytes_or_time_limit_reached) + { + if (!reader_future.valid()) + break; + + LOG_TRACE(log, "Rows or bytes limit reached, but we have one more file scheduled already, " + "will process it despite the limit"); + } if (shutdown_called) { - LOG_INFO(log, "Shutdown was called, stopping sync"); + LOG_TRACE(log, "Shutdown was called, stopping sync"); break; } @@ -460,46 +631,82 @@ Chunk StorageS3QueueSource::generate() reader = reader_future.get(); if (!reader) + { + LOG_TEST(log, "Reader finished"); break; + } - file_status = files_metadata->getFileStatus(reader.getObjectInfo().getPath()); + file_status = files_metadata->getFileStatus(reader.getObjectInfo()->getPath()); - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - internal_source->create_reader_pool->wait(); - reader_future = internal_source->createReaderAsync(processor_id); + if (!rows_or_bytes_or_time_limit_reached && processed_files.size() + 1 < max_processed_files_before_commit) + { + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + internal_source->create_reader_pool->wait(); + reader_future = internal_source->createReaderAsync(processor_id); + } } return {}; } -void StorageS3QueueSource::applyActionAfterProcessing(const String & path) +void ObjectStorageQueueSource::commit(bool success, const std::string & exception_message) +{ + LOG_TEST(log, "Having {} files to set as {}, failed files: {}", + processed_files.size(), success ? "Processed" : "Failed", failed_during_read_files.size()); + + for (const auto & file_metadata : processed_files) + { + if (success) + { + file_metadata->setProcessed(); + applyActionAfterProcessing(file_metadata->getPath()); + } + else + file_metadata->setFailed( + exception_message, + /* reduce_retry_count */false, + /* overwrite_status */true); + } + + for (const auto & file_metadata : failed_during_read_files) + { + /// `exception` from commit args is from insertion to storage. + /// Here we do not used it as failed_during_read_files were not inserted into storage, but skipped. + file_metadata->setFailed( + file_metadata->getFileStatus()->getException(), + /* reduce_retry_count */true, + /* overwrite_status */false); + } +} + +void ObjectStorageQueueSource::applyActionAfterProcessing(const String & path) { switch (action) { - case S3QueueAction::DELETE: + case ObjectStorageQueueAction::DELETE: { assert(remove_file_func); remove_file_func(path); break; } - case S3QueueAction::KEEP: + case ObjectStorageQueueAction::KEEP: break; } } -void StorageS3QueueSource::appendLogElement( +void ObjectStorageQueueSource::appendLogElement( const std::string & filename, - S3QueueMetadata::FileStatus & file_status_, + ObjectStorageQueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed) { - if (!s3_queue_log) + if (!system_queue_log) return; - S3QueueLogElement elem{}; + ObjectStorageQueueLogElement elem{}; { - elem = S3QueueLogElement + elem = ObjectStorageQueueLogElement { .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), .database = storage_id.database_name, @@ -507,14 +714,14 @@ void StorageS3QueueSource::appendLogElement( .uuid = toString(storage_id.uuid), .file_name = filename, .rows_processed = processed_rows, - .status = processed ? S3QueueLogElement::S3QueueStatus::Processed : S3QueueLogElement::S3QueueStatus::Failed, + .status = processed ? ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Processed : ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Failed, .counters_snapshot = file_status_.profile_counters.getPartiallyAtomicSnapshot(), .processing_start_time = file_status_.processing_start_time, .processing_end_time = file_status_.processing_end_time, .exception = file_status_.getException(), }; } - s3_queue_log->add(std::move(elem)); + system_queue_log->add(std::move(elem)); } } diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h new file mode 100644 index 00000000000..50428ed5f4b --- /dev/null +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -0,0 +1,167 @@ +#pragma once +#include "config.h" + +#include +#include +#include +#include +#include +#include + + +namespace Poco { class Logger; } + +namespace DB +{ + +struct ObjectMetadata; + +class ObjectStorageQueueSource : public ISource, WithContext +{ +public: + using Storage = StorageObjectStorage; + using Source = StorageObjectStorageSource; + using RemoveFileFunc = std::function; + using BucketHolderPtr = ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr; + using BucketHolder = ObjectStorageQueueOrderedFileMetadata::BucketHolder; + + struct ObjectStorageQueueObjectInfo : public Source::ObjectInfo + { + ObjectStorageQueueObjectInfo( + const Source::ObjectInfo & object_info, + ObjectStorageQueueMetadata::FileMetadataPtr file_metadata_); + + ObjectStorageQueueMetadata::FileMetadataPtr file_metadata; + }; + + class FileIterator : public StorageObjectStorageSource::IIterator + { + public: + FileIterator( + std::shared_ptr metadata_, + std::unique_ptr glob_iterator_, + std::atomic & shutdown_called_, + LoggerPtr logger_); + + bool isFinished() const; + + /// Note: + /// List results in s3 are always returned in UTF-8 binary order. + /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html) + Source::ObjectInfoPtr nextImpl(size_t processor) override; + + size_t estimatedKeysCount() override; + + /// If the key was taken from iterator via next() call, + /// we might later want to return it back for retrying. + void returnForRetry(Source::ObjectInfoPtr object_info); + + /// Release hold buckets. + /// In fact, they could be released in destructors of BucketHolder, + /// but we anyway try to release them explicitly, + /// because we want to be able to rethrow exceptions if they might happen. + void releaseFinishedBuckets(); + + private: + using Bucket = ObjectStorageQueueMetadata::Bucket; + using Processor = ObjectStorageQueueMetadata::Processor; + + const std::shared_ptr metadata; + const std::unique_ptr glob_iterator; + + std::atomic & shutdown_called; + std::mutex mutex; + LoggerPtr log; + + struct ListedKeys + { + std::deque keys; + std::optional processor; + }; + /// A cache of keys which were iterated via glob_iterator, but not taken for processing. + std::unordered_map listed_keys_cache; + + /// We store a vector of holders, because we cannot release them until processed files are committed. + std::unordered_map> bucket_holders; + + /// Is glob_iterator finished? + std::atomic_bool iterator_finished = false; + + /// Only for processing without buckets. + std::deque objects_to_retry; + + std::pair getNextKeyFromAcquiredBucket(size_t processor); + bool hasKeysForProcessor(const Processor & processor) const; + }; + + ObjectStorageQueueSource( + String name_, + size_t processor_id_, + const Block & header_, + std::unique_ptr internal_source_, + std::shared_ptr files_metadata_, + const ObjectStorageQueueAction & action_, + RemoveFileFunc remove_file_func_, + const NamesAndTypesList & requested_virtual_columns_, + ContextPtr context_, + const std::atomic & shutdown_called_, + const std::atomic & table_is_being_dropped_, + std::shared_ptr system_queue_log_, + const StorageID & storage_id_, + LoggerPtr log_, + size_t max_processed_files_before_commit_, + size_t max_processed_rows_before_commit_, + size_t max_processed_bytes_before_commit_, + size_t max_processing_time_sec_before_commit_, + bool commit_once_processed_); + + static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); + + String getName() const override; + + Chunk generate() override; + + /// Commit files after insertion into storage finished. + /// `success` defines whether insertion was successful or not. + void commit(bool success, const std::string & exception_message = {}); + +private: + const String name; + const size_t processor_id; + const ObjectStorageQueueAction action; + const std::shared_ptr files_metadata; + const std::shared_ptr internal_source; + const NamesAndTypesList requested_virtual_columns; + const std::atomic & shutdown_called; + const std::atomic & table_is_being_dropped; + const std::shared_ptr system_queue_log; + const StorageID storage_id; + const size_t max_processed_files_before_commit; + const size_t max_processed_rows_before_commit; + const size_t max_processed_bytes_before_commit; + const size_t max_processing_time_sec_before_commit; + const bool commit_once_processed; + + RemoveFileFunc remove_file_func; + LoggerPtr log; + + std::vector processed_files; + std::vector failed_during_read_files; + + Source::ReaderHolder reader; + std::future reader_future; + std::atomic initialized{false}; + + size_t processed_rows_from_file = 0; + size_t total_processed_rows = 0; + size_t total_processed_bytes = 0; + + Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; + + Chunk generateImpl(); + void applyActionAfterProcessing(const String & path); + void appendLogElement(const std::string & filename, ObjectStorageQueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); + void lazyInitialize(size_t processor); +}; + +} diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.cpp similarity index 72% rename from src/Storages/S3Queue/S3QueueTableMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.cpp index ecaa7ad57cc..cb9cdf8e186 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.cpp @@ -3,9 +3,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include @@ -20,33 +20,33 @@ namespace ErrorCodes namespace { - S3QueueMode modeFromString(const std::string & mode) + ObjectStorageQueueMode modeFromString(const std::string & mode) { if (mode == "ordered") - return S3QueueMode::ORDERED; + return ObjectStorageQueueMode::ORDERED; if (mode == "unordered") - return S3QueueMode::UNORDERED; - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected S3Queue mode: {}", mode); + return ObjectStorageQueueMode::UNORDERED; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected ObjectStorageQueue mode: {}", mode); } } -S3QueueTableMetadata::S3QueueTableMetadata( +ObjectStorageQueueTableMetadata::ObjectStorageQueueTableMetadata( const StorageObjectStorage::Configuration & configuration, - const S3QueueSettings & engine_settings, + const ObjectStorageQueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata) { format_name = configuration.format; after_processing = engine_settings.after_processing.toString(); mode = engine_settings.mode.toString(); - tracked_files_limit = engine_settings.s3queue_tracked_files_limit; - tracked_file_ttl_sec = engine_settings.s3queue_tracked_file_ttl_sec; - buckets = engine_settings.s3queue_buckets; - processing_threads_num = engine_settings.s3queue_processing_threads_num; + tracked_files_limit = engine_settings.tracked_files_limit; + tracked_file_ttl_sec = engine_settings.tracked_file_ttl_sec; + buckets = engine_settings.buckets; + processing_threads_num = engine_settings.processing_threads_num; columns = storage_metadata.getColumns().toString(); } -String S3QueueTableMetadata::toString() const +String ObjectStorageQueueTableMetadata::toString() const { Poco::JSON::Object json; json.set("after_processing", after_processing); @@ -65,7 +65,7 @@ String S3QueueTableMetadata::toString() const return oss.str(); } -void S3QueueTableMetadata::read(const String & metadata_str) +void ObjectStorageQueueTableMetadata::read(const String & metadata_str) { Poco::JSON::Parser parser; auto json = parser.parse(metadata_str).extract(); @@ -102,19 +102,19 @@ void S3QueueTableMetadata::read(const String & metadata_str) buckets = json->getValue("buckets"); } -S3QueueTableMetadata S3QueueTableMetadata::parse(const String & metadata_str) +ObjectStorageQueueTableMetadata ObjectStorageQueueTableMetadata::parse(const String & metadata_str) { - S3QueueTableMetadata metadata; + ObjectStorageQueueTableMetadata metadata; metadata.read(metadata_str); return metadata; } -void S3QueueTableMetadata::checkEquals(const S3QueueTableMetadata & from_zk) const +void ObjectStorageQueueTableMetadata::checkEquals(const ObjectStorageQueueTableMetadata & from_zk) const { checkImmutableFieldsEquals(from_zk); } -void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const +void ObjectStorageQueueTableMetadata::checkImmutableFieldsEquals(const ObjectStorageQueueTableMetadata & from_zk) const { if (after_processing != from_zk.after_processing) throw Exception( @@ -164,29 +164,29 @@ void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata from_zk.last_processed_path, last_processed_path); - if (modeFromString(mode) == S3QueueMode::ORDERED) + if (modeFromString(mode) == ObjectStorageQueueMode::ORDERED) { if (buckets != from_zk.buckets) { throw Exception( ErrorCodes::METADATA_MISMATCH, - "Existing table metadata in ZooKeeper differs in s3queue_buckets setting. " + "Existing table metadata in ZooKeeper differs in buckets setting. " "Stored in ZooKeeper: {}, local: {}", from_zk.buckets, buckets); } - if (S3QueueMetadata::getBucketsNum(*this) != S3QueueMetadata::getBucketsNum(from_zk)) + if (ObjectStorageQueueMetadata::getBucketsNum(*this) != ObjectStorageQueueMetadata::getBucketsNum(from_zk)) { throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in processing buckets. " "Stored in ZooKeeper: {}, local: {}", - S3QueueMetadata::getBucketsNum(*this), S3QueueMetadata::getBucketsNum(from_zk)); + ObjectStorageQueueMetadata::getBucketsNum(*this), ObjectStorageQueueMetadata::getBucketsNum(from_zk)); } } } -void S3QueueTableMetadata::checkEquals(const S3QueueSettings & current, const S3QueueSettings & expected) +void ObjectStorageQueueTableMetadata::checkEquals(const ObjectStorageQueueSettings & current, const ObjectStorageQueueSettings & expected) { if (current.after_processing != expected.after_processing) throw Exception( @@ -204,48 +204,48 @@ void S3QueueTableMetadata::checkEquals(const S3QueueSettings & current, const S3 expected.mode.toString(), current.mode.toString()); - if (current.s3queue_tracked_files_limit != expected.s3queue_tracked_files_limit) + if (current.tracked_files_limit != expected.tracked_files_limit) throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in max set size. " "Stored in ZooKeeper: {}, local: {}", - expected.s3queue_tracked_files_limit, - current.s3queue_tracked_files_limit); + expected.tracked_files_limit, + current.tracked_files_limit); - if (current.s3queue_tracked_file_ttl_sec != expected.s3queue_tracked_file_ttl_sec) + if (current.tracked_file_ttl_sec != expected.tracked_file_ttl_sec) throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in max set age. " "Stored in ZooKeeper: {}, local: {}", - expected.s3queue_tracked_file_ttl_sec, - current.s3queue_tracked_file_ttl_sec); + expected.tracked_file_ttl_sec, + current.tracked_file_ttl_sec); - if (current.s3queue_last_processed_path.value != expected.s3queue_last_processed_path.value) + if (current.last_processed_path.value != expected.last_processed_path.value) throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in last_processed_path. " "Stored in ZooKeeper: {}, local: {}", - expected.s3queue_last_processed_path.value, - current.s3queue_last_processed_path.value); + expected.last_processed_path.value, + current.last_processed_path.value); - if (current.mode == S3QueueMode::ORDERED) + if (current.mode == ObjectStorageQueueMode::ORDERED) { - if (current.s3queue_buckets != expected.s3queue_buckets) + if (current.buckets != expected.buckets) { throw Exception( ErrorCodes::METADATA_MISMATCH, - "Existing table metadata in ZooKeeper differs in s3queue_buckets setting. " + "Existing table metadata in ZooKeeper differs in buckets setting. " "Stored in ZooKeeper: {}, local: {}", - expected.s3queue_buckets, current.s3queue_buckets); + expected.buckets, current.buckets); } - if (S3QueueMetadata::getBucketsNum(current) != S3QueueMetadata::getBucketsNum(expected)) + if (ObjectStorageQueueMetadata::getBucketsNum(current) != ObjectStorageQueueMetadata::getBucketsNum(expected)) { throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in processing buckets. " "Stored in ZooKeeper: {}, local: {}", - S3QueueMetadata::getBucketsNum(current), S3QueueMetadata::getBucketsNum(expected)); + ObjectStorageQueueMetadata::getBucketsNum(current), ObjectStorageQueueMetadata::getBucketsNum(expected)); } } } diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h similarity index 50% rename from src/Storages/S3Queue/S3QueueTableMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h index d53b60570ae..bbae06b66c6 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include @@ -11,10 +11,10 @@ namespace DB class WriteBuffer; class ReadBuffer; -/** The basic parameters of S3Queue table engine for saving in ZooKeeper. +/** The basic parameters of ObjectStorageQueue table engine for saving in ZooKeeper. * Lets you verify that they match local ones. */ -struct S3QueueTableMetadata +struct ObjectStorageQueueTableMetadata { String format_name; String columns; @@ -26,22 +26,22 @@ struct S3QueueTableMetadata UInt64 processing_threads_num = 1; String last_processed_path; - S3QueueTableMetadata() = default; - S3QueueTableMetadata( + ObjectStorageQueueTableMetadata() = default; + ObjectStorageQueueTableMetadata( const StorageObjectStorage::Configuration & configuration, - const S3QueueSettings & engine_settings, + const ObjectStorageQueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata); void read(const String & metadata_str); - static S3QueueTableMetadata parse(const String & metadata_str); + static ObjectStorageQueueTableMetadata parse(const String & metadata_str); String toString() const; - void checkEquals(const S3QueueTableMetadata & from_zk) const; - static void checkEquals(const S3QueueSettings & current, const S3QueueSettings & expected); + void checkEquals(const ObjectStorageQueueTableMetadata & from_zk) const; + static void checkEquals(const ObjectStorageQueueSettings & current, const ObjectStorageQueueSettings & expected); private: - void checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const; + void checkImmutableFieldsEquals(const ObjectStorageQueueTableMetadata & from_zk) const; }; diff --git a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.cpp similarity index 88% rename from src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.cpp index c61e9557fc2..40751d9c332 100644 --- a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -18,13 +18,13 @@ namespace } } -S3QueueUnorderedFileMetadata::S3QueueUnorderedFileMetadata( +ObjectStorageQueueUnorderedFileMetadata::ObjectStorageQueueUnorderedFileMetadata( const std::filesystem::path & zk_path, const std::string & path_, FileStatusPtr file_status_, size_t max_loading_retries_, LoggerPtr log_) - : S3QueueIFileMetadata( + : ObjectStorageQueueIFileMetadata( path_, /* processing_node_path */zk_path / "processing" / getNodeName(path_), /* processed_node_path */zk_path / "processed" / getNodeName(path_), @@ -35,7 +35,7 @@ S3QueueUnorderedFileMetadata::S3QueueUnorderedFileMetadata( { } -std::pair S3QueueUnorderedFileMetadata::setProcessingImpl() +std::pair ObjectStorageQueueUnorderedFileMetadata::setProcessingImpl() { /// In one zookeeper transaction do the following: enum RequestType @@ -89,7 +89,7 @@ std::pair S3QueueUnorderedFileMet throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected state of zookeeper transaction: {}", magic_enum::enum_name(code)); } -void S3QueueUnorderedFileMetadata::setProcessedAtStartRequests( +void ObjectStorageQueueUnorderedFileMetadata::setProcessedAtStartRequests( Coordination::Requests & requests, const zkutil::ZooKeeperPtr &) { @@ -98,7 +98,7 @@ void S3QueueUnorderedFileMetadata::setProcessedAtStartRequests( processed_node_path, node_metadata.toString(), zkutil::CreateMode::Persistent)); } -void S3QueueUnorderedFileMetadata::setProcessedImpl() +void ObjectStorageQueueUnorderedFileMetadata::setProcessedImpl() { /// In one zookeeper transaction do the following: enum RequestType @@ -130,8 +130,11 @@ void S3QueueUnorderedFileMetadata::setProcessedImpl() const auto code = zk_client->tryMulti(requests, responses); if (code == Coordination::Error::ZOK) { - if (max_loading_retries) - zk_client->tryRemove(failed_node_path + ".retriable", -1); + if (max_loading_retries + && zk_client->tryRemove(failed_node_path + ".retriable", -1) == Coordination::Error::ZOK) + { + LOG_TEST(log, "Removed node {}.retriable", failed_node_path); + } LOG_TRACE(log, "Moved file `{}` to processed (node path: {})", path, processed_node_path); return; diff --git a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.h similarity index 75% rename from src/Storages/S3Queue/S3QueueUnorderedFileMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.h index 24c2765bf3a..cc5d8a09ec9 100644 --- a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.h @@ -1,17 +1,17 @@ #pragma once -#include +#include #include #include namespace DB { -class S3QueueUnorderedFileMetadata : public S3QueueIFileMetadata +class ObjectStorageQueueUnorderedFileMetadata : public ObjectStorageQueueIFileMetadata { public: using Bucket = size_t; - explicit S3QueueUnorderedFileMetadata( + explicit ObjectStorageQueueUnorderedFileMetadata( const std::filesystem::path & zk_path, const std::string & path_, FileStatusPtr file_status_, diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp similarity index 55% rename from src/Storages/S3Queue/StorageS3Queue.cpp rename to src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index afb75a21b21..95265cde9ea 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -1,10 +1,7 @@ #include -#include "config.h" #include -#include #include -#include #include #include #include @@ -15,29 +12,23 @@ #include #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include #include #include -#include #include +#include #include #include namespace fs = std::filesystem; -namespace ProfileEvents -{ - extern const Event S3DeleteObjects; - extern const Event S3ListObjects; -} - namespace DB { @@ -45,23 +36,22 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; - extern const int S3_ERROR; extern const int QUERY_NOT_ALLOWED; } namespace { - std::string chooseZooKeeperPath(const StorageID & table_id, const Settings & settings, const S3QueueSettings & s3queue_settings) + std::string chooseZooKeeperPath(const StorageID & table_id, const Settings & settings, const ObjectStorageQueueSettings & queue_settings) { std::string zk_path_prefix = settings.s3queue_default_zookeeper_path.value; if (zk_path_prefix.empty()) zk_path_prefix = "/"; std::string result_zk_path; - if (s3queue_settings.keeper_path.changed) + if (queue_settings.keeper_path.changed) { /// We do not add table uuid here on purpose. - result_zk_path = fs::path(zk_path_prefix) / s3queue_settings.keeper_path.value; + result_zk_path = fs::path(zk_path_prefix) / queue_settings.keeper_path.value; } else { @@ -71,35 +61,67 @@ namespace return zkutil::extractZooKeeperPath(result_zk_path, true); } - void checkAndAdjustSettings(S3QueueSettings & s3queue_settings, const Settings & settings, bool is_attach) + void checkAndAdjustSettings( + ObjectStorageQueueSettings & queue_settings, + ASTStorage * engine_args, + bool is_attach, + const LoggerPtr & log) { - if (!is_attach && !s3queue_settings.mode.changed) + if (!is_attach && !queue_settings.mode.changed) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `mode` (Unordered/Ordered) is not specified, but is required."); } /// In case !is_attach, we leave Ordered mode as default for compatibility. - if (!s3queue_settings.s3queue_processing_threads_num) + if (!queue_settings.processing_threads_num) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `s3queue_processing_threads_num` cannot be set to zero"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `processing_threads_num` cannot be set to zero"); } - if (!s3queue_settings.s3queue_enable_logging_to_s3queue_log.changed) - { - s3queue_settings.s3queue_enable_logging_to_s3queue_log = settings.s3queue_enable_logging_to_s3queue_log; - } - - if (s3queue_settings.s3queue_cleanup_interval_min_ms > s3queue_settings.s3queue_cleanup_interval_max_ms) + if (queue_settings.cleanup_interval_min_ms > queue_settings.cleanup_interval_max_ms) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Setting `s3queue_cleanup_interval_min_ms` ({}) must be less or equal to `s3queue_cleanup_interval_max_ms` ({})", - s3queue_settings.s3queue_cleanup_interval_min_ms, s3queue_settings.s3queue_cleanup_interval_max_ms); + "Setting `cleanup_interval_min_ms` ({}) must be less or equal to `cleanup_interval_max_ms` ({})", + queue_settings.cleanup_interval_min_ms, queue_settings.cleanup_interval_max_ms); } + + if (!is_attach && !queue_settings.processing_threads_num.changed) + { + queue_settings.processing_threads_num = std::max(getNumberOfPhysicalCPUCores(), 16); + engine_args->settings->as()->changes.insertSetting( + "processing_threads_num", + queue_settings.processing_threads_num.value); + + LOG_TRACE(log, "Set `processing_threads_num` to {}", queue_settings.processing_threads_num); + } + } + + std::shared_ptr getQueueLog(const ObjectStoragePtr & storage, const ContextPtr & context, const ObjectStorageQueueSettings & table_settings) + { + const auto & settings = context->getSettingsRef(); + switch (storage->getType()) + { + case DB::ObjectStorageType::S3: + { + if (table_settings.enable_logging_to_queue_log || settings.s3queue_enable_logging_to_s3queue_log) + return context->getS3QueueLog(); + return nullptr; + } + case DB::ObjectStorageType::Azure: + { + if (table_settings.enable_logging_to_queue_log) + return context->getAzureQueueLog(); + return nullptr; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected object storage type: {}", storage->getType()); + } + } } -StorageS3Queue::StorageS3Queue( - std::unique_ptr s3queue_settings_, +StorageObjectStorageQueue::StorageObjectStorageQueue( + std::unique_ptr queue_settings_, const ConfigurationPtr configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, @@ -107,16 +129,16 @@ StorageS3Queue::StorageS3Queue( const String & comment, ContextPtr context_, std::optional format_settings_, - ASTStorage * /* engine_args */, + ASTStorage * engine_args, LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_) - , s3queue_settings(std::move(s3queue_settings_)) - , zk_path(chooseZooKeeperPath(table_id_, context_->getSettingsRef(), *s3queue_settings)) + , queue_settings(std::move(queue_settings_)) + , zk_path(chooseZooKeeperPath(table_id_, context_->getSettingsRef(), *queue_settings)) , configuration{configuration_} , format_settings(format_settings_) - , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms) - , log(getLogger("StorageS3Queue (" + table_id_.getFullTableName() + ")")) + , reschedule_processing_interval_ms(queue_settings->polling_min_timeout_ms) + , log(getLogger(fmt::format("Storage{}Queue ({})", configuration->getEngineName(), table_id_.getFullTableName()))) { if (configuration->getPath().empty()) { @@ -128,10 +150,10 @@ StorageS3Queue::StorageS3Queue( } else if (!configuration->isPathWithGlobs()) { - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue url must either end with '/' or contain globs"); + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "ObjectStorageQueue url must either end with '/' or contain globs"); } - checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef(), mode > LoadingStrictnessLevel::CREATE); + checkAndAdjustSettings(*queue_settings, engine_args, mode > LoadingStrictnessLevel::CREATE, log); object_storage = configuration->createObjectStorage(context_, /* is_readonly */true); FormatFactory::instance().checkFormatName(configuration->format); @@ -149,30 +171,30 @@ StorageS3Queue::StorageS3Queue( setInMemoryMetadata(storage_metadata); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); - task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); }); + task = getContext()->getSchedulePool().createTask("ObjectStorageQueueStreamingTask", [this] { threadFunc(); }); - /// Get metadata manager from S3QueueMetadataFactory, + /// Get metadata manager from ObjectStorageQueueMetadataFactory, /// it will increase the ref count for the metadata object. - /// The ref count is decreased when StorageS3Queue::drop() method is called. - files_metadata = S3QueueMetadataFactory::instance().getOrCreate(zk_path, *s3queue_settings); + /// The ref count is decreased when StorageObjectStorageQueue::drop() method is called. + files_metadata = ObjectStorageQueueMetadataFactory::instance().getOrCreate(zk_path, *queue_settings); try { files_metadata->initialize(configuration_, storage_metadata); } catch (...) { - S3QueueMetadataFactory::instance().remove(zk_path); + ObjectStorageQueueMetadataFactory::instance().remove(zk_path); throw; } } -void StorageS3Queue::startup() +void StorageObjectStorageQueue::startup() { if (task) task->activateAndSchedule(); } -void StorageS3Queue::shutdown(bool is_drop) +void StorageObjectStorageQueue::shutdown(bool is_drop) { table_is_being_dropped = is_drop; shutdown_called = true; @@ -191,31 +213,31 @@ void StorageS3Queue::shutdown(bool is_drop) LOG_TRACE(log, "Shut down storage"); } -void StorageS3Queue::drop() +void StorageObjectStorageQueue::drop() { - S3QueueMetadataFactory::instance().remove(zk_path); + ObjectStorageQueueMetadataFactory::instance().remove(zk_path); } -bool StorageS3Queue::supportsSubsetOfColumns(const ContextPtr & context_) const +bool StorageObjectStorageQueue::supportsSubsetOfColumns(const ContextPtr & context_) const { return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context_, format_settings); } -class ReadFromS3Queue : public SourceStepWithFilter +class ReadFromObjectStorageQueue : public SourceStepWithFilter { public: - std::string getName() const override { return "ReadFromS3Queue"; } + std::string getName() const override { return "ReadFromObjectStorageQueue"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void applyFilters(ActionDAGNodes added_filter_nodes) override; - ReadFromS3Queue( + ReadFromObjectStorageQueue( const Names & column_names_, const SelectQueryInfo & query_info_, const StorageSnapshotPtr & storage_snapshot_, const ContextPtr & context_, Block sample_block, ReadFromFormatInfo info_, - std::shared_ptr storage_, + std::shared_ptr storage_, size_t max_block_size_) : SourceStepWithFilter( DataStream{.header = std::move(sample_block)}, @@ -231,15 +253,15 @@ public: private: ReadFromFormatInfo info; - std::shared_ptr storage; + std::shared_ptr storage; size_t max_block_size; - std::shared_ptr iterator; + std::shared_ptr iterator; void createIterator(const ActionsDAG::Node * predicate); }; -void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate) +void ReadFromObjectStorageQueue::createIterator(const ActionsDAG::Node * predicate) { if (iterator) return; @@ -248,7 +270,7 @@ void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate) } -void ReadFromS3Queue::applyFilters(ActionDAGNodes added_filter_nodes) +void ReadFromObjectStorageQueue::applyFilters(ActionDAGNodes added_filter_nodes) { SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); @@ -259,7 +281,7 @@ void ReadFromS3Queue::applyFilters(ActionDAGNodes added_filter_nodes) createIterator(predicate); } -void StorageS3Queue::read( +void StorageObjectStorageQueue::read( QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, @@ -281,10 +303,10 @@ void StorageS3Queue::read( "Cannot read from {} with attached materialized views", getName()); } - auto this_ptr = std::static_pointer_cast(shared_from_this()); + auto this_ptr = std::static_pointer_cast(shared_from_this()); auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); - auto reading = std::make_unique( + auto reading = std::make_unique( column_names, query_info, storage_snapshot, @@ -297,18 +319,20 @@ void StorageS3Queue::read( query_plan.addStep(std::move(reading)); } -void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +void ReadFromObjectStorageQueue::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { Pipes pipes; - const size_t adjusted_num_streams = storage->s3queue_settings->s3queue_processing_threads_num; + const size_t adjusted_num_streams = storage->queue_settings->processing_threads_num; createIterator(nullptr); for (size_t i = 0; i < adjusted_num_streams; ++i) pipes.emplace_back(storage->createSource( - i, + i/* processor_id */, info, iterator, - max_block_size, context)); + max_block_size, + context, + true/* commit_once_processed */)); auto pipe = Pipe::unitePipes(std::move(pipes)); if (pipe.empty()) @@ -320,12 +344,13 @@ void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const pipeline.init(std::move(pipe)); } -std::shared_ptr StorageS3Queue::createSource( +std::shared_ptr StorageObjectStorageQueue::createSource( size_t processor_id, const ReadFromFormatInfo & info, - std::shared_ptr file_iterator, + std::shared_ptr file_iterator, size_t max_block_size, - ContextPtr local_context) + ContextPtr local_context, + bool commit_once_processed) { auto internal_source = std::make_unique( getName(), @@ -343,25 +368,30 @@ std::shared_ptr StorageS3Queue::createSource( { object_storage->removeObject(StoredObject(path)); }; - auto s3_queue_log = s3queue_settings->s3queue_enable_logging_to_s3queue_log ? local_context->getS3QueueLog() : nullptr; - return std::make_shared( + + return std::make_shared( getName(), processor_id, info.source_header, std::move(internal_source), files_metadata, - s3queue_settings->after_processing, + queue_settings->after_processing, file_deleter, info.requested_virtual_columns, local_context, shutdown_called, table_is_being_dropped, - s3_queue_log, + getQueueLog(object_storage, local_context, *queue_settings), getStorageID(), - log); + log, + queue_settings->max_processed_files_before_commit, + queue_settings->max_processed_rows_before_commit, + queue_settings->max_processed_bytes_before_commit, + queue_settings->max_processing_time_sec_before_commit, + commit_once_processed); } -bool StorageS3Queue::hasDependencies(const StorageID & table_id) +bool StorageObjectStorageQueue::hasDependencies(const StorageID & table_id) { // Check if all dependencies are attached auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); @@ -386,7 +416,7 @@ bool StorageS3Queue::hasDependencies(const StorageID & table_id) return true; } -void StorageS3Queue::threadFunc() +void StorageObjectStorageQueue::threadFunc() { if (shutdown_called) return; @@ -404,12 +434,12 @@ void StorageS3Queue::threadFunc() if (streamToViews()) { /// Reset the reschedule interval. - reschedule_processing_interval_ms = s3queue_settings->s3queue_polling_min_timeout_ms; + reschedule_processing_interval_ms = queue_settings->polling_min_timeout_ms; } else { /// Increase the reschedule interval. - reschedule_processing_interval_ms += s3queue_settings->s3queue_polling_backoff_ms; + reschedule_processing_interval_ms += queue_settings->polling_backoff_ms; } LOG_DEBUG(log, "Stopped streaming to {} attached views", dependencies_count); @@ -426,63 +456,98 @@ void StorageS3Queue::threadFunc() if (!shutdown_called) { - LOG_TRACE(log, "Reschedule S3 Queue processing thread in {} ms", reschedule_processing_interval_ms); + LOG_TRACE(log, "Reschedule processing thread in {} ms", reschedule_processing_interval_ms); task->scheduleAfter(reschedule_processing_interval_ms); } } -bool StorageS3Queue::streamToViews() +bool StorageObjectStorageQueue::streamToViews() { + // Create a stream for each consumer and join them in a union stream + // Only insert into dependent views and expect that input blocks contain virtual columns + auto table_id = getStorageID(); auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); if (!table) throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); - auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); - - // Create an INSERT query for streaming data auto insert = std::make_shared(); insert->table_id = table_id; - auto s3queue_context = Context::createCopy(getContext()); - s3queue_context->makeQueryContext(); + auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); + auto queue_context = Context::createCopy(getContext()); + queue_context->makeQueryContext(); - // Create a stream for each consumer and join them in a union stream - // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true); - auto block_io = interpreter.execute(); - auto file_iterator = createFileIterator(s3queue_context, nullptr); + auto file_iterator = createFileIterator(queue_context, nullptr); + size_t total_rows = 0; - auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context)); - - Pipes pipes; - pipes.reserve(s3queue_settings->s3queue_processing_threads_num); - for (size_t i = 0; i < s3queue_settings->s3queue_processing_threads_num; ++i) + while (!shutdown_called && !file_iterator->isFinished()) { - auto source = createSource(i, read_from_format_info, file_iterator, DBMS_DEFAULT_BUFFER_SIZE, s3queue_context); - pipes.emplace_back(std::move(source)); + InterpreterInsertQuery interpreter(insert, queue_context, false, true, true); + auto block_io = interpreter.execute(); + auto read_from_format_info = prepareReadingFromFormat( + block_io.pipeline.getHeader().getNames(), + storage_snapshot, + supportsSubsetOfColumns(queue_context)); + + Pipes pipes; + std::vector> sources; + + pipes.reserve(queue_settings->processing_threads_num); + sources.reserve(queue_settings->processing_threads_num); + + for (size_t i = 0; i < queue_settings->processing_threads_num; ++i) + { + auto source = createSource( + i/* processor_id */, + read_from_format_info, + file_iterator, + DBMS_DEFAULT_BUFFER_SIZE, + queue_context, + false/* commit_once_processed */); + + pipes.emplace_back(source); + sources.emplace_back(source); + } + auto pipe = Pipe::unitePipes(std::move(pipes)); + + block_io.pipeline.complete(std::move(pipe)); + block_io.pipeline.setNumThreads(queue_settings->processing_threads_num); + block_io.pipeline.setConcurrencyControl(queue_context->getSettingsRef().use_concurrency_control); + + std::atomic_size_t rows = 0; + block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); + + try + { + CompletedPipelineExecutor executor(block_io.pipeline); + executor.execute(); + } + catch (...) + { + for (auto & source : sources) + source->commit(/* success */false, getCurrentExceptionMessage(true)); + + file_iterator->releaseFinishedBuckets(); + throw; + } + + for (auto & source : sources) + source->commit(/* success */true); + + file_iterator->releaseFinishedBuckets(); + total_rows += rows; } - auto pipe = Pipe::unitePipes(std::move(pipes)); - block_io.pipeline.complete(std::move(pipe)); - block_io.pipeline.setNumThreads(s3queue_settings->s3queue_processing_threads_num); - block_io.pipeline.setConcurrencyControl(s3queue_context->getSettingsRef().use_concurrency_control); - - std::atomic_size_t rows = 0; - block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); - - CompletedPipelineExecutor executor(block_io.pipeline); - executor.execute(); - - return rows > 0; + return total_rows > 0; } -zkutil::ZooKeeperPtr StorageS3Queue::getZooKeeper() const +zkutil::ZooKeeperPtr StorageObjectStorageQueue::getZooKeeper() const { return getContext()->getZooKeeper(); } -std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) +std::shared_ptr StorageObjectStorageQueue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) { auto settings = configuration->getQuerySettings(local_context); auto glob_iterator = std::make_unique( @@ -491,73 +556,4 @@ std::shared_ptr StorageS3Queue::createFileIterator return std::make_shared(files_metadata, std::move(glob_iterator), shutdown_called, log); } -#if USE_AWS_S3 -void registerStorageS3Queue(StorageFactory & factory) -{ - factory.registerStorage( - "S3Queue", - [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = std::make_shared(); - StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getContext(), false); - - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - - auto s3queue_settings = std::make_unique(); - if (args.storage_def->settings) - { - s3queue_settings->loadFromQuery(*args.storage_def); - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - else - LOG_TRACE(getLogger("StorageS3"), "Remove: {}", change.name); - args.storage_def->settings->changes.removeSetting(change.name); - } - - for (const auto & change : args.storage_def->settings->changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.applyChange(change); - } - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - return std::make_shared( - std::move(s3queue_settings), - std::move(configuration), - args.table_id, - args.columns, - args.constraints, - args.comment, - args.getContext(), - format_settings, - args.storage_def, - args.mode); - }, - { - .supports_settings = true, - .supports_schema_inference = true, - .source_access_type = AccessType::S3, - }); -} -#endif - } diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h similarity index 73% rename from src/Storages/S3Queue/StorageS3Queue.h rename to src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h index ef83a1ccc25..758721674fe 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h @@ -5,25 +5,24 @@ #include #include #include -#include -#include +#include +#include #include #include -#include #include namespace DB { -class S3QueueMetadata; +class ObjectStorageQueueMetadata; -class StorageS3Queue : public IStorage, WithContext +class StorageObjectStorageQueue : public IStorage, WithContext { public: using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; - StorageS3Queue( - std::unique_ptr s3queue_settings_, + StorageObjectStorageQueue( + std::unique_ptr queue_settings_, ConfigurationPtr configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, @@ -34,7 +33,7 @@ public: ASTStorage * engine_args, LoadingStrictnessLevel mode); - String getName() const override { return "S3Queue"; } + String getName() const override { return "ObjectStorageQueue"; } void read( QueryPlan & query_plan, @@ -53,13 +52,13 @@ public: zkutil::ZooKeeperPtr getZooKeeper() const; private: - friend class ReadFromS3Queue; - using FileIterator = StorageS3QueueSource::FileIterator; + friend class ReadFromObjectStorageQueue; + using FileIterator = ObjectStorageQueueSource::FileIterator; - const std::unique_ptr s3queue_settings; + const std::unique_ptr queue_settings; const fs::path zk_path; - std::shared_ptr files_metadata; + std::shared_ptr files_metadata; ConfigurationPtr configuration; ObjectStoragePtr object_storage; @@ -83,12 +82,13 @@ private: bool supportsDynamicSubcolumns() const override { return true; } std::shared_ptr createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate); - std::shared_ptr createSource( + std::shared_ptr createSource( size_t processor_id, const ReadFromFormatInfo & info, - std::shared_ptr file_iterator, + std::shared_ptr file_iterator, size_t max_block_size, - ContextPtr local_context); + ContextPtr local_context, + bool commit_once_processed); bool hasDependencies(const StorageID & table_id); bool streamToViews(); diff --git a/src/Storages/ObjectStorageQueue/registerQueueStorage.cpp b/src/Storages/ObjectStorageQueue/registerQueueStorage.cpp new file mode 100644 index 00000000000..20968143627 --- /dev/null +++ b/src/Storages/ObjectStorageQueue/registerQueueStorage.cpp @@ -0,0 +1,115 @@ +#include "config.h" + +#include +#include +#include +#include + +#if USE_AWS_S3 +#include +#include +#endif + +#if USE_AZURE_BLOB_STORAGE +#include +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +template +StoragePtr createQueueStorage(const StorageFactory::Arguments & args) +{ + auto & engine_args = args.engine_args; + if (engine_args.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); + + auto configuration = std::make_shared(); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getContext(), false); + + // Use format settings from global server context + settings from + // the SETTINGS clause of the create query. Settings from current + // session and user are ignored. + std::optional format_settings; + + auto queue_settings = std::make_unique(); + if (args.storage_def->settings) + { + queue_settings->loadFromQuery(*args.storage_def); + FormatFactorySettings user_format_settings; + + // Apply changed settings from global context, but ignore the + // unknown ones, because we only have the format settings here. + const auto & changes = args.getContext()->getSettingsRef().changes(); + for (const auto & change : changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.set(change.name, change.value); + + args.storage_def->settings->changes.removeSetting(change.name); + } + + for (const auto & change : args.storage_def->settings->changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.applyChange(change); + } + format_settings = getFormatSettings(args.getContext(), user_format_settings); + } + else + { + format_settings = getFormatSettings(args.getContext()); + } + + return std::make_shared( + std::move(queue_settings), + std::move(configuration), + args.table_id, + args.columns, + args.constraints, + args.comment, + args.getContext(), + format_settings, + args.storage_def, + args.mode); +} + +#if USE_AWS_S3 +void registerStorageS3Queue(StorageFactory & factory) +{ + factory.registerStorage( + "S3Queue", + [](const StorageFactory::Arguments & args) + { + return createQueueStorage(args); + }, + { + .supports_settings = true, + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} +#endif + +#if USE_AZURE_BLOB_STORAGE +void registerStorageAzureQueue(StorageFactory & factory) +{ + factory.registerStorage( + "AzureQueue", + [](const StorageFactory::Arguments & args) + { + return createQueueStorage(args); + }, + { + .supports_settings = true, + .supports_schema_inference = true, + .source_access_type = AccessType::AZURE, + }); +} +#endif +} diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index f5d869cdea0..9654b4ef37a 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -16,7 +16,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -306,7 +307,9 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) builder.resize(1); // Generate aggregated blocks with rows less or equal than the original block. // There should be only one output block after this transformation. - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 0baa234e7a3..31812406d34 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -26,6 +26,7 @@ #include #include #include +#include namespace DB @@ -167,7 +168,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali auto & serialized_value_offsets = serialized_value_column->getOffsets(); WriteBufferFromVector writer_key(serialized_key_data); WriteBufferFromVector writer_value(serialized_value_data); - + FormatSettings format_settings; /// Format settings is 1.5KB, so it's not wise to create it for each row for (auto && chunk : input_chunks) { const auto & columns = chunk.getColumns(); @@ -175,7 +176,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali for (size_t i = 0; i < rows; ++i) { for (size_t idx = 0; idx < columns.size(); ++idx) - serializations[idx]->serializeBinary(*columns[idx], i, idx == primary_key_pos ? writer_key : writer_value, {}); + serializations[idx]->serializeBinary(*columns[idx], i, idx == primary_key_pos ? writer_key : writer_value, format_settings); /// String in ColumnString must be null-terminated writeChar('\0', writer_key); writeChar('\0', writer_value); diff --git a/src/Storages/S3Queue/S3QueueMetadataFactory.h b/src/Storages/S3Queue/S3QueueMetadataFactory.h deleted file mode 100644 index 80e96f8aa7e..00000000000 --- a/src/Storages/S3Queue/S3QueueMetadataFactory.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once -#include -#include -#include - -namespace DB -{ - -class S3QueueMetadataFactory final : private boost::noncopyable -{ -public: - using FilesMetadataPtr = std::shared_ptr; - - static S3QueueMetadataFactory & instance(); - - FilesMetadataPtr getOrCreate(const std::string & zookeeper_path, const S3QueueSettings & settings); - - void remove(const std::string & zookeeper_path); - - std::unordered_map getAll(); - -private: - struct Metadata - { - explicit Metadata(std::shared_ptr metadata_) : metadata(metadata_), ref_count(1) {} - - std::shared_ptr metadata; - /// TODO: the ref count should be kept in keeper, because of the case with distributed processing. - size_t ref_count = 0; - }; - using MetadataByPath = std::unordered_map; - - MetadataByPath metadata_by_path; - std::mutex mutex; -}; - -} diff --git a/src/Storages/S3Queue/S3QueueSettings.h b/src/Storages/S3Queue/S3QueueSettings.h deleted file mode 100644 index 4a92d99c411..00000000000 --- a/src/Storages/S3Queue/S3QueueSettings.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include -#include -#include - - -namespace DB -{ -class ASTStorage; - - -#define S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ - M(S3QueueMode, \ - mode, \ - S3QueueMode::ORDERED, \ - "With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKepeer." \ - "With ordered mode, only the max name of the successfully consumed file stored.", \ - 0) \ - M(S3QueueAction, after_processing, S3QueueAction::KEEP, "Delete or keep file in S3 after successful processing", 0) \ - M(String, keeper_path, "", "Zookeeper node path", 0) \ - M(UInt32, s3queue_loading_retries, 0, "Retry loading up to specified number of times", 0) \ - M(UInt32, s3queue_processing_threads_num, 1, "Number of processing threads", 0) \ - M(UInt32, s3queue_enable_logging_to_s3queue_log, 1, "Enable logging to system table system.s3queue_log", 0) \ - M(String, s3queue_last_processed_path, "", "For Ordered mode. Files that have lexicographically smaller file name are considered already processed", 0) \ - M(UInt32, s3queue_tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \ - M(UInt32, s3queue_polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \ - M(UInt32, s3queue_polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \ - M(UInt32, s3queue_polling_backoff_ms, 1000, "Polling backoff", 0) \ - M(UInt32, s3queue_tracked_files_limit, 1000, "For unordered mode. Max set size for tracking processed files in ZooKeeper", 0) \ - M(UInt32, s3queue_cleanup_interval_min_ms, 60000, "For unordered mode. Polling backoff min for cleanup", 0) \ - M(UInt32, s3queue_cleanup_interval_max_ms, 60000, "For unordered mode. Polling backoff max for cleanup", 0) \ - M(UInt32, s3queue_buckets, 0, "Number of buckets for Ordered mode parallel processing", 0) \ - -#define LIST_OF_S3QUEUE_SETTINGS(M, ALIAS) \ - S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ - LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) - -DECLARE_SETTINGS_TRAITS(S3QueueSettingsTraits, LIST_OF_S3QUEUE_SETTINGS) - - -struct S3QueueSettings : public BaseSettings -{ - void loadFromQuery(ASTStorage & storage_def); -}; - -} diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h deleted file mode 100644 index 6e098f8cb63..00000000000 --- a/src/Storages/S3Queue/S3QueueSource.h +++ /dev/null @@ -1,132 +0,0 @@ -#pragma once -#include "config.h" - -#include -#include -#include -#include -#include -#include - - -namespace Poco { class Logger; } - -namespace DB -{ - -struct ObjectMetadata; - -class StorageS3QueueSource : public ISource, WithContext -{ -public: - using Storage = StorageObjectStorage; - using ConfigurationPtr = Storage::ConfigurationPtr; - using GlobIterator = StorageObjectStorageSource::GlobIterator; - using ZooKeeperGetter = std::function; - using RemoveFileFunc = std::function; - using FileStatusPtr = S3QueueMetadata::FileStatusPtr; - using ReaderHolder = StorageObjectStorageSource::ReaderHolder; - using Metadata = S3QueueMetadata; - using ObjectInfo = StorageObjectStorageSource::ObjectInfo; - using ObjectInfoPtr = std::shared_ptr; - using ObjectInfos = std::vector; - - struct S3QueueObjectInfo : public ObjectInfo - { - S3QueueObjectInfo( - const ObjectInfo & object_info, - Metadata::FileMetadataPtr processing_holder_); - - Metadata::FileMetadataPtr processing_holder; - }; - - class FileIterator : public StorageObjectStorageSource::IIterator - { - public: - FileIterator( - std::shared_ptr metadata_, - std::unique_ptr glob_iterator_, - std::atomic & shutdown_called_, - LoggerPtr logger_); - - /// Note: - /// List results in s3 are always returned in UTF-8 binary order. - /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html) - ObjectInfoPtr nextImpl(size_t processor) override; - - size_t estimatedKeysCount() override; - - private: - using Bucket = S3QueueMetadata::Bucket; - using Processor = S3QueueMetadata::Processor; - - const std::shared_ptr metadata; - const std::unique_ptr glob_iterator; - - std::atomic & shutdown_called; - std::mutex mutex; - LoggerPtr log; - - std::mutex buckets_mutex; - struct ListedKeys - { - std::deque keys; - std::optional processor; - }; - std::unordered_map listed_keys_cache; - bool iterator_finished = false; - std::unordered_map bucket_holders; - - std::pair getNextKeyFromAcquiredBucket(size_t processor); - }; - - StorageS3QueueSource( - String name_, - size_t processor_id_, - const Block & header_, - std::unique_ptr internal_source_, - std::shared_ptr files_metadata_, - const S3QueueAction & action_, - RemoveFileFunc remove_file_func_, - const NamesAndTypesList & requested_virtual_columns_, - ContextPtr context_, - const std::atomic & shutdown_called_, - const std::atomic & table_is_being_dropped_, - std::shared_ptr s3_queue_log_, - const StorageID & storage_id_, - LoggerPtr log_); - - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - - String getName() const override; - - Chunk generate() override; - -private: - const String name; - const size_t processor_id; - const S3QueueAction action; - const std::shared_ptr files_metadata; - const std::shared_ptr internal_source; - const NamesAndTypesList requested_virtual_columns; - const std::atomic & shutdown_called; - const std::atomic & table_is_being_dropped; - const std::shared_ptr s3_queue_log; - const StorageID storage_id; - - RemoveFileFunc remove_file_func; - LoggerPtr log; - - ReaderHolder reader; - std::future reader_future; - std::atomic initialized{false}; - size_t processed_rows_from_file = 0; - - S3QueueOrderedFileMetadata::BucketHolderPtr current_bucket_holder; - - void applyActionAfterProcessing(const String & path); - void appendLogElement(const std::string & filename, S3QueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); - void lazyInitialize(size_t processor); -}; - -} diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 52b6674c93d..bdf69b9be15 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -140,6 +140,9 @@ class IMergeTreeDataPart; using ManyExpressionActions = std::vector; +struct StorageSnapshot; +using StorageSnapshotPtr = std::shared_ptr; + /** Query along with some additional data, * that can be used during query processing * inside storage engines. @@ -173,6 +176,13 @@ struct SelectQueryInfo /// Local storage limits StorageLimits local_storage_limits; + /// This is a leak of abstraction. + /// StorageMerge replaces storage into query_tree. However, column types may be changed for inner table. + /// So, resolved query tree might have incompatible types. + /// StorageDistributed uses this query tree to calculate a header, throws if we use storage snapshot. + /// To avoid this, we use initial merge_storage_snapshot. + StorageSnapshotPtr merge_storage_snapshot; + /// Cluster for the query. ClusterPtr cluster; /// Optimized cluster for the query. @@ -219,8 +229,8 @@ struct SelectQueryInfo bool is_parameterized_view = false; bool optimize_trivial_count = false; - // If limit is not 0, that means it's a trivial limit query. - UInt64 limit = 0; + // If not 0, that means it's a trivial limit query. + UInt64 trivial_limit = 0; /// For IStorageSystemOneBlock std::vector columns_mask; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 5048ef4788e..849fa5dbe0b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -846,7 +846,7 @@ void StorageDistributed::read( remote_storage_id = StorageID{remote_database, remote_table}; auto query_tree_distributed = buildQueryTreeDistributed(modified_query_info, - storage_snapshot, + query_info.merge_storage_snapshot ? query_info.merge_storage_snapshot : storage_snapshot, remote_storage_id, remote_table_function_ptr); header = InterpreterSelectQueryAnalyzer::getSampleBlock(query_tree_distributed, local_context, SelectQueryOptions(processed_stage).analyze()); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 16c248f1b7b..7f39ff615f0 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1789,7 +1789,8 @@ public: void onCancel() override { std::lock_guard cancel_lock(cancel_mutex); - finalize(); + cancelBuffers(); + releaseBuffers(); cancelled = true; } @@ -1803,18 +1804,18 @@ public: catch (...) { /// An exception context is needed to proper delete write buffers without finalization - release(); + releaseBuffers(); } } void onFinish() override { std::lock_guard cancel_lock(cancel_mutex); - finalize(); + finalizeBuffers(); } private: - void finalize() + void finalizeBuffers() { if (!writer) return; @@ -1827,19 +1828,27 @@ private: catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - release(); + releaseBuffers(); throw; } write_buf->finalize(); } - void release() + void releaseBuffers() { writer.reset(); write_buf.reset(); } + void cancelBuffers() + { + if (writer) + writer->cancel(); + if (write_buf) + write_buf->cancel(); + } + StorageMetadataPtr metadata_snapshot; String table_name_for_log; diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 2f850c76465..754bc096958 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -705,7 +705,7 @@ Pipe StorageGenerateRandom::read( } } - UInt64 query_limit = query_info.limit; + UInt64 query_limit = query_info.trivial_limit; if (query_limit && num_streams * max_block_size > query_limit) { /// We want to avoid spawning more streams than necessary @@ -717,7 +717,7 @@ Pipe StorageGenerateRandom::read( /// Will create more seed values for each source from initial seed. pcg64 generate(random_seed); - auto shared_state = std::make_shared(query_info.limit); + auto shared_state = std::make_shared(query_info.trivial_limit); for (UInt64 i = 0; i < num_streams; ++i) { diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 08e0526550d..de0324d7998 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -322,6 +322,10 @@ public: /// Rollback partial writes. /// No more writing. + for (auto & [_, stream] : streams) + { + stream.cancel(); + } streams.clear(); /// Truncate files to the older sizes. @@ -373,6 +377,12 @@ private: plain->next(); plain->finalize(); } + + void cancel() + { + compressed.cancel(); + plain->cancel(); + } }; using FileStreams = std::map; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 735f51e1f32..316f398b476 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -273,7 +273,7 @@ void StorageMaterializedView::read( * They may be added in case of distributed query with JOIN. * In that case underlying table returns joined columns as well. */ - converting_actions->projectInput(false); + converting_actions->removeUnusedActions(); auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), converting_actions); converting_step->setStepDescription("Convert target table structure to MaterializedView structure"); query_plan.addStep(std::move(converting_step)); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index ed3f43367dd..2dbe82c92d8 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -889,6 +889,8 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo SelectQueryInfo modified_query_info = query_info; + modified_query_info.merge_storage_snapshot = merge_storage_snapshot; + if (modified_query_info.planner_context) modified_query_info.planner_context = std::make_shared(modified_context, modified_query_info.planner_context); @@ -974,7 +976,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo } PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/); - actions_visitor.visit(filter_actions_dag, column_node); + actions_visitor.visit(*filter_actions_dag, column_node); } column_names_as_aliases = filter_actions_dag->getRequiredColumnsNames(); if (column_names_as_aliases.empty()) @@ -1198,7 +1200,10 @@ ReadFromMerge::ChildPlan ReadFromMerge::createPlanForTable( if (allow_experimental_analyzer) { - InterpreterSelectQueryAnalyzer interpreter(modified_query_info.query_tree, + /// Converting query to AST because types might be different in the source table. + /// Need to resolve types again. + auto ast = modified_query_info.query_tree->toAST(); + InterpreterSelectQueryAnalyzer interpreter(ast, modified_context, SelectQueryOptions(processed_stage)); @@ -1480,7 +1485,7 @@ void ReadFromMerge::convertAndFilterSourceStream( query_analysis_pass.run(query_tree, local_context); PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/); - const auto & nodes = actions_visitor.visit(actions_dag, query_tree); + const auto & nodes = actions_visitor.visit(*actions_dag, query_tree); if (nodes.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected to have 1 output but got {}", nodes.size()); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9352f772ce1..9255ee00340 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1269,6 +1269,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( if (command.type != MutationCommand::Type::DROP_COLUMN && command.type != MutationCommand::Type::DROP_INDEX && command.type != MutationCommand::Type::DROP_PROJECTION + && command.type != MutationCommand::Type::DROP_STATISTICS && command.type != MutationCommand::Type::RENAME_COLUMN) { commands_for_size_validation.push_back(command); diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 9379cb5a1c6..a8713c61e4d 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -35,9 +35,12 @@ #include #include +#include +#include #include #include +#include #include #include @@ -106,28 +109,79 @@ ColumnsDescription StoragePostgreSQL::getTableStructureFromData( return ColumnsDescription{columns_info->columns}; } -Pipe StoragePostgreSQL::read( - const Names & column_names_, +namespace +{ + +class ReadFromPostgreSQL : public SourceStepWithFilter +{ +public: + ReadFromPostgreSQL( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + size_t max_block_size_, + String remote_table_schema_, + String remote_table_name_, + postgres::ConnectionHolderPtr connection_) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) + , logger(getLogger("ReadFromPostgreSQL")) + , max_block_size(max_block_size_) + , remote_table_schema(remote_table_schema_) + , remote_table_name(remote_table_name_) + , connection(std::move(connection_)) + { + } + + std::string getName() const override { return "ReadFromPostgreSQL"; } + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override + { + std::optional transform_query_limit; + if (limit && !filter_actions_dag) + transform_query_limit = limit; + + /// Connection is already made to the needed database, so it should not be present in the query; + /// remote_table_schema is empty if it is not specified, will access only table_name. + String query = transformQueryForExternalDatabase( + query_info, + required_source_columns, + storage_snapshot->metadata->getColumns().getOrdinary(), + IdentifierQuotingStyle::DoubleQuotes, + LiteralEscapingStyle::PostgreSQL, + remote_table_schema, + remote_table_name, + context, + transform_query_limit); + LOG_TRACE(logger, "Query: {}", query); + + pipeline.init(Pipe(std::make_shared>(std::move(connection), query, getOutputStream().header, max_block_size))); + } + + LoggerPtr logger; + size_t max_block_size; + String remote_table_schema; + String remote_table_name; + postgres::ConnectionHolderPtr connection; +}; + +} + +void StoragePostgreSQL::read( + QueryPlan & query_plan, + const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info_, - ContextPtr context_, + SelectQueryInfo & query_info, + ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size_, + size_t max_block_size, size_t /*num_streams*/) { - storage_snapshot->check(column_names_); - - /// Connection is already made to the needed database, so it should not be present in the query; - /// remote_table_schema is empty if it is not specified, will access only table_name. - String query = transformQueryForExternalDatabase( - query_info_, - column_names_, - storage_snapshot->metadata->getColumns().getOrdinary(), - IdentifierQuotingStyle::DoubleQuotes, LiteralEscapingStyle::PostgreSQL, remote_table_schema, remote_table_name, context_); - LOG_TRACE(log, "Query: {}", query); + storage_snapshot->check(column_names); Block sample_block; - for (const String & column_name : column_names_) + for (const String & column_name : column_names) { auto column_data = storage_snapshot->metadata->getColumns().getPhysical(column_name); WhichDataType which(column_data.type); @@ -136,7 +190,17 @@ Pipe StoragePostgreSQL::read( sample_block.insert({ column_data.type, column_data.name }); } - return Pipe(std::make_shared>(pool->get(), query, sample_block, max_block_size_)); + auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + local_context, + sample_block, + max_block_size, + remote_table_schema, + remote_table_name, + pool->get()); + query_plan.addStep(std::move(reading)); } diff --git a/src/Storages/StoragePostgreSQL.h b/src/Storages/StoragePostgreSQL.h index 1ed4f7a7611..a8fa22f71b2 100644 --- a/src/Storages/StoragePostgreSQL.h +++ b/src/Storages/StoragePostgreSQL.h @@ -37,11 +37,12 @@ public: String getName() const override { return "PostgreSQL"; } - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, - ContextPtr context, + ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, size_t num_streams) override; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index db58d0081c6..a127384c03c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5656,7 +5656,7 @@ std::optional StorageReplicatedMergeTree::distributedWriteFromClu { auto connection = std::make_shared( node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(), - node.user, node.password, SSHKey(), node.quota_key, node.cluster, node.cluster_secret, + node.user, node.password, SSHKey(), /*jwt*/"", node.quota_key, node.cluster, node.cluster_secret, "ParallelInsertSelectInititiator", node.compression, node.secure diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index a8c8e81e23d..5b7f9fc0ac2 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -97,8 +97,7 @@ void SetOrJoinSink::onFinish() if (persistent) { backup_stream.flush(); - compressed_backup_buf.next(); - backup_buf->next(); + compressed_backup_buf.finalize(); backup_buf->finalize(); table.disk->replaceFile(fs::path(backup_tmp_path) / backup_file_name, fs::path(backup_path) / backup_file_name); diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index f0c5103d657..8df87d6290f 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -207,7 +207,10 @@ public: /// Rollback partial writes. /// No more writing. + data_out->cancel(); data_out.reset(); + + data_out_compressed->cancel(); data_out_compressed.reset(); /// Truncate files to the older sizes. @@ -233,8 +236,7 @@ public: if (done) return; - data_out->next(); - data_out_compressed->next(); + data_out->finalize(); data_out_compressed->finalize(); /// Save the new indices. @@ -494,8 +496,7 @@ void StorageStripeLog::saveIndices(const WriteLock & /* already locked for writi for (size_t i = start; i != num_indices; ++i) indices.blocks[i].write(*index_out); - index_out->next(); - index_out_compressed->next(); + index_out->finalize(); index_out_compressed->finalize(); num_indices_saved = num_indices; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index f8424bc3d1b..895da028fc2 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -576,31 +576,25 @@ void StorageURLSink::consume(Chunk chunk) void StorageURLSink::onCancel() { std::lock_guard lock(cancel_mutex); - finalize(); + cancelBuffers(); + releaseBuffers(); cancelled = true; } -void StorageURLSink::onException(std::exception_ptr exception) +void StorageURLSink::onException(std::exception_ptr) { std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } + cancelBuffers(); + releaseBuffers(); } void StorageURLSink::onFinish() { std::lock_guard lock(cancel_mutex); - finalize(); + finalizeBuffers(); } -void StorageURLSink::finalize() +void StorageURLSink::finalizeBuffers() { if (!writer) return; @@ -613,19 +607,27 @@ void StorageURLSink::finalize() catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - release(); + releaseBuffers(); throw; } write_buf->finalize(); } -void StorageURLSink::release() +void StorageURLSink::releaseBuffers() { writer.reset(); write_buf.reset(); } +void StorageURLSink::cancelBuffers() +{ + if (writer) + writer->cancel(); + if (write_buf) + write_buf->cancel(); +} + class PartitionedStorageURLSink : public PartitionedSink { public: diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index f550ccb2bc4..3090f8db12e 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -257,8 +257,10 @@ public: void onFinish() override; private: - void finalize(); - void release(); + void finalizeBuffers(); + void releaseBuffers(); + void cancelBuffers(); + std::unique_ptr write_buf; OutputFormatPtr writer; std::mutex cancel_mutex; diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 49da1eba9ec..8dd8d3b6154 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -298,7 +298,7 @@ private: ClientInfo::Interface client_info_interface; size_t db_table_num = 0; size_t total_tables; - std::shared_ptr access; + std::shared_ptr access; bool need_to_check_access_for_tables; String query_id; std::chrono::milliseconds lock_acquire_timeout; diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index b42b070d518..9201eef185f 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -194,6 +194,7 @@ const char * auto_contributors[] { "Artem Gavrilov", "Artem Hnilov", "Artem Konovalov", + "Artem Mustafin", "Artem Pershin", "Artem Streltsov", "Artem Zuikov", @@ -307,6 +308,7 @@ const char * auto_contributors[] { "Daniil Ivanik", "Daniil Rubin", "Danila Kutenin", + "Danila Puzov", "Daniël van Eeden", "Dao", "Dao Minh Thuc", @@ -417,6 +419,7 @@ const char * auto_contributors[] { "Filippov Denis", "Fille", "Flowyi", + "Francesco Ciocchetti", "Francisco Barón", "Francisco Javier Jurado Moreno", "Frank Chen", @@ -449,6 +452,7 @@ const char * auto_contributors[] { "Gleb-Tretyakov", "GoGoWen2021", "Gregory", + "Grigorii Sokolik", "Grigory", "Grigory Buteyko", "Grigory Pervakov", @@ -464,6 +468,7 @@ const char * auto_contributors[] { "Hamoon", "Han Fei", "Han Shukai", + "HappenLee", "Harry Lee", "Harry-Lee", "HarryLeeIBM", @@ -627,6 +632,7 @@ const char * auto_contributors[] { "Kostiantyn Storozhuk", "Kozlov Ivan", "KrJin", + "Kris Buytaert", "Krisztián Szűcs", "Kruglov Pavel", "Krzysztof Góralski", @@ -644,6 +650,7 @@ const char * auto_contributors[] { "Latysheva Alexandra", "Laurie Li", "LaurieLY", + "Lee sungju", "Lemore", "Leonardo Cecchi", "Leonardo Maciel", @@ -770,6 +777,7 @@ const char * auto_contributors[] { "Mikhail Filimonov", "Mikhail Fursov", "Mikhail Gaidamaka", + "Mikhail Gorshkov", "Mikhail Guzov", "Mikhail Korotov", "Mikhail Koviazin", @@ -904,11 +912,13 @@ const char * auto_contributors[] { "Petr Vasilev", "Pham Anh Tuan", "Philip Hallstrom", + "Philipp Schreiber", "Philippe Ombredanne", "PigInCloud", "Potya", "Pradeep Chhetri", "Prashant Shahi", + "Pratima Patel", "Priyansh Agrawal", "Pxl", "Pysaoke", @@ -978,6 +988,7 @@ const char * auto_contributors[] { "Samuel Colvin", "San", "Sanjam Panda", + "Sariel", "Saulius Valatka", "Sean Haynes", "Sean Lafferty", @@ -1067,6 +1078,7 @@ const char * auto_contributors[] { "TABLUM.IO", "TAC", "TCeason", + "TTPO100AJIEX", "Tagir Kuskarov", "Tai White", "Taleh Zaliyev", @@ -1089,6 +1101,7 @@ const char * auto_contributors[] { "Tiaonmmn", "Tigran Khudaverdyan", "Tim Liou", + "Tim MacDonald", "Tim Windelschmidt", "Timur Magomedov", "Timur Solodovnikov", @@ -1201,6 +1214,7 @@ const char * auto_contributors[] { "Xiaofei Hu", "Xin Wang", "Xoel Lopez Barata", + "Xu Jia", "Xudong Zhang", "Y Lu", "Yakko Majuri", @@ -1237,6 +1251,7 @@ const char * auto_contributors[] { "Yusuke Tanaka", "Zach Naimon", "Zheng Miao", + "ZhiHong Zhang", "ZhiYong Wang", "Zhichang Yu", "Zhichun Wu", @@ -1276,6 +1291,7 @@ const char * auto_contributors[] { "alexeyerm", "alexeypavlenko", "alfredlu", + "allegrinisante", "amesaru", "amoschen", "amudong", @@ -1287,6 +1303,7 @@ const char * auto_contributors[] { "anneji", "anneji-dev", "annvsh", + "anonymous", "anrodigina", "antikvist", "anton", @@ -1346,6 +1363,7 @@ const char * auto_contributors[] { "chenxing-xc", "chenxing.xc", "chertus", + "chloro", "chou.fan", "christophe.kalenzaga", "clarkcaoliu", @@ -1458,6 +1476,7 @@ const char * auto_contributors[] { "gyuton", "hanqf-git", "hao.he", + "haohang", "hardstep33", "hchen9", "hcz", @@ -1479,6 +1498,7 @@ const char * auto_contributors[] { "iammagicc", "ianton-ru", "ice1x", + "iceFireser", "idfer", "ifinik", "igomac", @@ -1642,6 +1662,7 @@ const char * auto_contributors[] { "mo-avatar", "mochi", "monchickey", + "morning-color", "morty", "moscas", "mosinnik", @@ -1695,6 +1716,7 @@ const char * auto_contributors[] { "philip.han", "pingyu", "pkubaj", + "pn", "potya", "pppeace", "presto53", @@ -1742,6 +1764,7 @@ const char * auto_contributors[] { "sanjam", "santaux", "santrancisco", + "sarielwxm", "satanson", "save-my-heart", "sdk2", @@ -1846,6 +1869,7 @@ const char * auto_contributors[] { "whysage", "wineternity", "woodlzm", + "wudidapaopao", "wuxiaobai24", "wxybear", "wzl", @@ -1860,6 +1884,7 @@ const char * auto_contributors[] { "xleoken", "xlwh", "xmy", + "xogoodnow", "xuelei", "xuzifu666", "yakkomajuri", diff --git a/src/Storages/System/StorageSystemS3Queue.cpp b/src/Storages/System/StorageSystemS3Queue.cpp index 637182067f2..c1d686067fd 100644 --- a/src/Storages/System/StorageSystemS3Queue.cpp +++ b/src/Storages/System/StorageSystemS3Queue.cpp @@ -11,9 +11,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include @@ -26,6 +26,7 @@ ColumnsDescription StorageSystemS3Queue::getColumnsDescription() return ColumnsDescription { {"zookeeper_path", std::make_shared(), "Path in zookeeper to S3Queue metadata"}, + {"file_path", std::make_shared(), "File path of a file which is being processed by S3Queue"}, {"file_name", std::make_shared(), "File name of a file which is being processed by S3Queue"}, {"rows_processed", std::make_shared(), "Currently processed number of rows"}, {"status", std::make_shared(), "Status of processing: Processed, Processing, Failed"}, @@ -43,13 +44,14 @@ StorageSystemS3Queue::StorageSystemS3Queue(const StorageID & table_id_) void StorageSystemS3Queue::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { - for (const auto & [zookeeper_path, metadata] : S3QueueMetadataFactory::instance().getAll()) + for (const auto & [zookeeper_path, metadata] : ObjectStorageQueueMetadataFactory::instance().getAll()) { - for (const auto & [file_name, file_status] : metadata->getFileStatuses()) + for (const auto & [file_path, file_status] : metadata->getFileStatuses()) { size_t i = 0; res_columns[i++]->insert(zookeeper_path); - res_columns[i++]->insert(file_name); + res_columns[i++]->insert(file_path); + res_columns[i++]->insert(std::filesystem::path(file_path).filename().string()); res_columns[i++]->insert(file_status->processed_rows.load()); res_columns[i++]->insert(magic_enum::enum_name(file_status->state.load())); diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp index 651ca815420..339a59e88a5 100644 --- a/src/Storages/System/StorageSystemScheduler.cpp +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -12,7 +12,6 @@ #include #include #include -#include "Common/Scheduler/ResourceRequest.h" namespace DB diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp index 2e848f68850..d242b6de4ec 100644 --- a/src/Storages/System/StorageSystemServerSettings.cpp +++ b/src/Storages/System/StorageSystemServerSettings.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -81,7 +82,11 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context {"uncompressed_cache_size", {std::to_string(context->getUncompressedCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}}, {"index_mark_cache_size", {std::to_string(context->getIndexMarkCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}}, {"index_uncompressed_cache_size", {std::to_string(context->getIndexUncompressedCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}}, - {"mmap_cache_size", {std::to_string(context->getMMappedFileCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}} + {"mmap_cache_size", {std::to_string(context->getMMappedFileCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}}, + + {"merge_workload", {context->getMergeWorkload(), ChangeableWithoutRestart::Yes}}, + {"mutation_workload", {context->getMutationWorkload(), ChangeableWithoutRestart::Yes}}, + {"config_reload_interval_ms", {std::to_string(context->getConfigReloaderInterval()), ChangeableWithoutRestart::Yes}} }; if (context->areBackgroundExecutorsInitialized()) diff --git a/src/Storages/System/StorageSystemSettingsChanges.cpp b/src/Storages/System/StorageSystemSettingsChanges.cpp index de47ec52031..d6c83870741 100644 --- a/src/Storages/System/StorageSystemSettingsChanges.cpp +++ b/src/Storages/System/StorageSystemSettingsChanges.cpp @@ -26,6 +26,7 @@ ColumnsDescription StorageSystemSettingsChanges::getColumnsDescription() void StorageSystemSettingsChanges::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { + const auto & settings_changes_history = getSettingsChangesHistory(); for (auto it = settings_changes_history.rbegin(); it != settings_changes_history.rend(); ++it) { res_columns[0]->insert(it->first.toString()); diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index 09a2bb5d963..0720a2f24d9 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -109,8 +109,8 @@ Pipe StorageSystemZeros::read( storage_snapshot->check(column_names); UInt64 query_limit = limit ? *limit : 0; - if (query_info.limit) - query_limit = query_limit ? std::min(query_limit, query_info.limit) : query_info.limit; + if (query_info.trivial_limit) + query_limit = query_limit ? std::min(query_limit, query_info.trivial_limit) : query_info.trivial_limit; if (query_limit && query_limit < max_block_size) max_block_size = query_limit; diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index 950e20512c0..ec29b84dac3 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -36,7 +36,8 @@ ColumnsDescription StorageSystemZooKeeperConnection::getColumnsDescription() /* 9 */ {"xid", std::make_shared(), "XID of the current session."}, /* 10*/ {"enabled_feature_flags", std::make_shared(std::move(feature_flags_enum)), "Feature flags which are enabled. Only applicable to ClickHouse Keeper." - } + }, + /* 11*/ {"availability_zone", std::make_shared(), "Availability zone"}, }; } @@ -85,6 +86,7 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co columns[8]->insert(zookeeper->getClientID()); columns[9]->insert(zookeeper->getConnectionXid()); add_enabled_feature_flags(zookeeper); + columns[11]->insert(zookeeper->getConnectedHostAvailabilityZone()); } }; diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 8bca1c97aad..77e6ee9cb24 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include @@ -633,7 +633,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) }); builder.addSimpleTransform([&](const Block & current_header) { - return std::make_shared( + return std::make_shared( current_header, getContext()->getSettingsRef().min_insert_block_size_rows, getContext()->getSettingsRef().min_insert_block_size_bytes); @@ -1068,9 +1068,10 @@ void StorageWindowView::threadFuncFireProc() if (max_watermark >= timestamp_now) clean_cache_task->schedule(); + UInt64 next_fire_ms = static_cast(next_fire_signal) * 1000; UInt64 timestamp_ms = static_cast(Poco::Timestamp().epochMicroseconds()) / 1000; if (!shutdown_called) - fire_task->scheduleAfter(std::max(UInt64(0), static_cast(next_fire_signal) * 1000 - timestamp_ms)); + fire_task->scheduleAfter(next_fire_ms - std::min(next_fire_ms, timestamp_ms)); } void StorageWindowView::threadFuncFireEvent() @@ -1532,7 +1533,7 @@ void StorageWindowView::writeIntoWindowView( builder = select_block.buildQueryPipeline(); builder.addSimpleTransform([&](const Block & current_header) { - return std::make_shared( + return std::make_shared( current_header, local_context->getSettingsRef().min_insert_block_size_rows, local_context->getSettingsRef().min_insert_block_size_bytes); diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 131712e750a..ed378169381 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -290,7 +290,7 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, size_t min_block_size_rows = mutable_context->getSettingsRef().min_external_table_block_size_rows; size_t min_block_size_bytes = mutable_context->getSettingsRef().min_external_table_block_size_bytes; - auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); + auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); builder->resize(1); builder->addTransform(std::move(squashing)); diff --git a/src/Storages/examples/CMakeLists.txt b/src/Storages/examples/CMakeLists.txt index b4786b7313b..4f221efbd2b 100644 --- a/src/Storages/examples/CMakeLists.txt +++ b/src/Storages/examples/CMakeLists.txt @@ -5,4 +5,4 @@ clickhouse_add_executable (merge_selector2 merge_selector2.cpp) target_link_libraries (merge_selector2 PRIVATE dbms) clickhouse_add_executable (get_current_inserts_in_replicated get_current_inserts_in_replicated.cpp) -target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper) +target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper clickhouse_functions) diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 47542b7b47e..628e5a85437 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -34,6 +34,7 @@ void registerStorageFuzzJSON(StorageFactory & factory); void registerStorageS3(StorageFactory & factory); void registerStorageHudi(StorageFactory & factory); void registerStorageS3Queue(StorageFactory & factory); +void registerStorageAzureQueue(StorageFactory & factory); #if USE_PARQUET void registerStorageDeltaLake(StorageFactory & factory); @@ -126,6 +127,10 @@ void registerStorages() registerStorageFuzzJSON(factory); #endif +#if USE_AZURE_BLOB_STORAGE + registerStorageAzureQueue(factory); +#endif + #if USE_AWS_S3 registerStorageHudi(factory); registerStorageS3Queue(factory); diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index afc458ea612..fc85bde11d9 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -288,7 +288,8 @@ String transformQueryForExternalDatabaseImpl( LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, - ContextPtr context) + ContextPtr context, + std::optional limit) { bool strict = context->getSettingsRef().external_table_strict_query; @@ -374,6 +375,9 @@ String transformQueryForExternalDatabaseImpl( select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(original_where)); } + if (limit) + select->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::make_shared(*limit)); + ASTPtr select_ptr = select; dropAliases(select_ptr); @@ -399,7 +403,8 @@ String transformQueryForExternalDatabase( LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, - ContextPtr context) + ContextPtr context, + std::optional limit) { if (!query_info.syntax_analyzer_result) { @@ -424,7 +429,8 @@ String transformQueryForExternalDatabase( literal_escaping_style, database, table, - context); + context, + limit); } auto clone_query = query_info.query->clone(); @@ -436,7 +442,8 @@ String transformQueryForExternalDatabase( literal_escaping_style, database, table, - context); + context, + limit); } } diff --git a/src/Storages/transformQueryForExternalDatabase.h b/src/Storages/transformQueryForExternalDatabase.h index fb6af21907e..2cd7e3676b5 100644 --- a/src/Storages/transformQueryForExternalDatabase.h +++ b/src/Storages/transformQueryForExternalDatabase.h @@ -21,6 +21,8 @@ class IAST; * and WHERE contains subset of (AND-ed) conditions from original query, * that contain only compatible expressions. * + * If limit is passed additionally apply LIMIT in result query. + * * Compatible expressions are comparisons of identifiers, constants, and logical operations on them. * * Throws INCORRECT_QUERY if external_table_strict_query (from context settings) @@ -34,6 +36,7 @@ String transformQueryForExternalDatabase( LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, - ContextPtr context); + ContextPtr context, + std::optional limit = {}); } diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 1f7357b6494..bd92465e1aa 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -1,13 +1,4 @@ -00725_memory_tracking 01624_soft_constraints 02354_vector_search_queries -02901_parallel_replicas_rollup -02999_scalar_subqueries_bug_2 -# Flaky list -01825_type_json_in_array -01414_mutations_and_errors_zookeeper -01287_max_execution_speed # Check after ConstantNode refactoring -02154_parser_backtracking 02944_variant_as_common_type -02942_variant_cast diff --git a/tests/ci/artifacts_helper.py b/tests/ci/artifacts_helper.py index 37abf0bdefb..503ba2e1ec4 100644 --- a/tests/ci/artifacts_helper.py +++ b/tests/ci/artifacts_helper.py @@ -15,7 +15,7 @@ from github.Commit import Commit from build_download_helper import download_build_with_progress from commit_status_helper import post_commit_status from compress_files import SUFFIX, compress_fast, decompress_fast -from env_helper import CI, RUNNER_TEMP, S3_BUILDS_BUCKET +from env_helper import IS_CI, RUNNER_TEMP, S3_BUILDS_BUCKET from git_helper import SHA_REGEXP from report import FOOTER_HTML_TEMPLATE, HEAD_HTML_TEMPLATE, SUCCESS from s3_helper import S3Helper @@ -131,7 +131,7 @@ class ArtifactsHelper: post_commit_status(commit, SUCCESS, url, "Artifacts for workflow", "Artifacts") def _regenerate_index(self) -> None: - if CI: + if IS_CI: files = self._get_s3_objects() else: files = self._get_local_s3_objects() diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index b88a9476a6d..8bc0f51dfc7 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -6,7 +6,7 @@ import subprocess import sys from pathlib import Path -from build_download_helper import get_build_name_for_check, read_build_urls +from build_download_helper import read_build_urls from clickhouse_helper import CiLogsCredentials from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import REPORT_PATH, TEMP_PATH @@ -14,6 +14,7 @@ from pr_info import PRInfo from report import FAIL, FAILURE, OK, SUCCESS, JobReport, TestResult from stopwatch import Stopwatch from tee_popen import TeePopen +from ci_config import CI IMAGE_NAME = "clickhouse/fuzzer" @@ -64,7 +65,7 @@ def main(): docker_image = pull_image(get_docker_image(IMAGE_NAME)) - build_name = get_build_name_for_check(check_name) + build_name = CI.get_required_build_name(check_name) urls = read_build_urls(build_name, reports_path) if not urls: raise ValueError("No build URLs found") diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py index d41fdaf05ff..71b18572938 100644 --- a/tests/ci/bugfix_validate_check.py +++ b/tests/ci/bugfix_validate_check.py @@ -7,7 +7,7 @@ import sys from pathlib import Path from typing import List, Sequence, Tuple -from ci_config import JobNames +from ci_config import CI from ci_utils import normalize_string from env_helper import TEMP_PATH from functional_test_check import NO_CHANGES_MSG @@ -92,16 +92,19 @@ def main(): logging.basicConfig(level=logging.INFO) # args = parse_args() stopwatch = Stopwatch() - jobs_to_validate = [JobNames.STATELESS_TEST_RELEASE, JobNames.INTEGRATION_TEST] + jobs_to_validate = [ + CI.JobNames.STATELESS_TEST_RELEASE, + CI.JobNames.INTEGRATION_TEST, + ] functional_job_report_file = Path(TEMP_PATH) / "functional_test_job_report.json" integration_job_report_file = Path(TEMP_PATH) / "integration_test_job_report.json" jobs_report_files = { - JobNames.STATELESS_TEST_RELEASE: functional_job_report_file, - JobNames.INTEGRATION_TEST: integration_job_report_file, + CI.JobNames.STATELESS_TEST_RELEASE: functional_job_report_file, + CI.JobNames.INTEGRATION_TEST: integration_job_report_file, } jobs_scripts = { - JobNames.STATELESS_TEST_RELEASE: "functional_test_check.py", - JobNames.INTEGRATION_TEST: "integration_test_check.py", + CI.JobNames.STATELESS_TEST_RELEASE: "functional_test_check.py", + CI.JobNames.INTEGRATION_TEST: "integration_test_check.py", } for test_job in jobs_to_validate: diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 260b77b0ee5..39f34ed9ccf 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -9,7 +9,7 @@ from pathlib import Path from typing import Tuple import docker_images_helper -from ci_config import CI_CONFIG, BuildConfig +from ci_config import CI from env_helper import REPO_COPY, S3_BUILDS_BUCKET, TEMP_PATH from git_helper import Git from lambda_shared_package.lambda_shared.pr import Labels @@ -27,7 +27,7 @@ IMAGE_NAME = "clickhouse/binary-builder" BUILD_LOG_NAME = "build_log.log" -def _can_export_binaries(build_config: BuildConfig) -> bool: +def _can_export_binaries(build_config: CI.BuildConfig) -> bool: if build_config.package_type != "deb": return False if build_config.sanitizer != "": @@ -38,7 +38,7 @@ def _can_export_binaries(build_config: BuildConfig) -> bool: def get_packager_cmd( - build_config: BuildConfig, + build_config: CI.BuildConfig, packager_path: Path, output_path: Path, build_version: str, @@ -147,7 +147,8 @@ def main(): stopwatch = Stopwatch() build_name = args.build_name - build_config = CI_CONFIG.build_config[build_name] + build_config = CI.JOB_CONFIGS[build_name].build_config + assert build_config temp_path = Path(TEMP_PATH) temp_path.mkdir(parents=True, exist_ok=True) diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index a641f9f4544..8482abb26e0 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -10,7 +10,7 @@ from typing import Any, Callable, List, Optional, Union import requests -from ci_config import CI_CONFIG +from ci_config import CI try: # A work around for scripts using this downloading module without required deps @@ -122,10 +122,6 @@ def get_gh_api( raise APIException(f"Unable to request data from GH API: {url}") from exc -def get_build_name_for_check(check_name: str) -> str: - return CI_CONFIG.test_configs[check_name].required_build - - def read_build_urls(build_name: str, reports_path: Union[Path, str]) -> List[str]: for root, _, files in os.walk(reports_path): for file in files: @@ -210,7 +206,7 @@ def download_builds_filter( result_path: Path, filter_fn: Callable[[str], bool] = lambda _: True, ) -> None: - build_name = get_build_name_for_check(check_name) + build_name = CI.get_required_build_name(check_name) urls = read_build_urls(build_name, reports_path) logger.info("The build report for %s contains the next URLs: %s", build_name, urls) @@ -247,7 +243,7 @@ def download_clickhouse_binary( def get_clickhouse_binary_url( check_name: str, reports_path: Union[Path, str] ) -> Optional[str]: - build_name = get_build_name_for_check(check_name) + build_name = CI.get_required_build_name(check_name) urls = read_build_urls(build_name, reports_path) logger.info("The build report for %s contains the next URLs: %s", build_name, urls) for url in urls: diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 1d734fbb3f8..04c8d12fc30 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import argparse import json import logging import os @@ -6,7 +7,6 @@ import sys from pathlib import Path from typing import List -from ci_config import CI_CONFIG, Build from env_helper import ( GITHUB_JOB_URL, GITHUB_REPOSITORY, @@ -14,7 +14,7 @@ from env_helper import ( REPORT_PATH, TEMP_PATH, CI_CONFIG_PATH, - CI, + IS_CI, ) from pr_info import PRInfo from report import ( @@ -25,8 +25,10 @@ from report import ( JobReport, create_build_html_report, get_worst_status, + FAILURE, ) from stopwatch import Stopwatch +from ci_config import CI # Old way to read the neads_data NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH", "") @@ -46,16 +48,13 @@ def main(): "\n ".join(p.as_posix() for p in reports_path.rglob("*.json")), ) - build_check_name = sys.argv[1] + build_check_name = CI.JobNames.BUILD_CHECK pr_info = PRInfo() - builds_for_check = CI_CONFIG.get_builds_for_report( - build_check_name, - release=pr_info.is_release, - backport=pr_info.head_ref.startswith("backport/"), - ) - if CI: + args = parse_args() + + if (CI_CONFIG_PATH or IS_CI) and not args.reports: # In CI only specific builds might be manually selected, or some wf does not build all builds. # Filtering @builds_for_check to verify only builds that are present in the current CI workflow with open(CI_CONFIG_PATH, encoding="utf-8") as jfd: @@ -64,8 +63,15 @@ def main(): ci_config["jobs_data"]["jobs_to_skip"] + ci_config["jobs_data"]["jobs_to_do"] ) - builds_for_check = [job for job in builds_for_check if job in all_ci_jobs] - print(f"NOTE: following build reports will be accounted: [{builds_for_check}]") + builds_for_check = [job for job in CI.BuildNames if job in all_ci_jobs] + print("NOTE: builds for check taken from ci configuration") + else: + builds_for_check = parse_args().reports + for job in builds_for_check: + assert job in CI.BuildNames, "Builds must be known build job names" + print("NOTE: builds for check taken from input arguments") + + print(f"NOTE: following build reports will be checked: [{builds_for_check}]") required_builds = len(builds_for_check) missing_builds = 0 @@ -77,8 +83,8 @@ def main(): build_name, pr_info.number, pr_info.head_ref ) if not build_result: - if build_name == Build.FUZZERS: - logging.info("Build [%s] is missing - skip", Build.FUZZERS) + if build_name == CI.BuildNames.FUZZERS: + logging.info("Build [%s] is missing - skip", CI.BuildNames.FUZZERS) continue logging.warning("Build results for %s is missing", build_name) build_result = BuildResult.missing_result("missing") @@ -132,17 +138,16 @@ def main(): # Check if there are no builds at all, do not override bad status if summary_status == SUCCESS: if missing_builds: - summary_status = PENDING + summary_status = FAILURE elif ok_groups == 0: summary_status = ERROR - addition = "" - if missing_builds: - addition = ( - f" ({required_builds - missing_builds} of {required_builds} builds are OK)" - ) + description = "" - description = f"{ok_groups}/{total_groups} artifact groups are OK{addition}" + if missing_builds: + description = f"{missing_builds} of {required_builds} builds are missing." + + description += f" {ok_groups}/{total_groups} artifact groups are OK" JobReport( description=description, @@ -158,5 +163,16 @@ def main(): sys.exit(1) +def parse_args(): + parser = argparse.ArgumentParser("Generates overall build report") + + parser.add_argument( + "--reports", + nargs="+", + help="List of build reports to check", + ) + return parser.parse_args() + + if __name__ == "__main__": main() diff --git a/tests/ci/changelog.py b/tests/ci/changelog.py index fcb61d3f605..3ba618f3ae5 100755 --- a/tests/ci/changelog.py +++ b/tests/ci/changelog.py @@ -33,10 +33,11 @@ from version_helper import ( categories_preferred_order = ( "Backward Incompatible Change", "New Feature", + "Experimental Feature", "Performance Improvement", "Improvement", - "Critical Bug Fix", - "Bug Fix", + "Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)", + "Bug Fix (user-visible misbehavior in an official stable release)", "Build/Testing/Packaging Improvement", "Other", ) @@ -205,7 +206,7 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri try: item = gh.get_pull_cached(repo, int(branch_parts[-1])) except Exception as e: - logging.warning("unable to get backpoted PR, exception: %s", e) + logging.warning("unable to get backported PR, exception: %s", e) else: logging.warning( "The branch %s doesn't match backport template, using PR %s as is", @@ -280,12 +281,17 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri category, ): category = "NOT FOR CHANGELOG / INSIGNIFICANT" - entry = item.title + # Sometimes we declare not for changelog but still write a description. Keep it + if len(entry) <= 4 or "Documentation entry" in entry: + entry = item.title # Normalize bug fixes - if re.match( - r"(?i)bug\Wfix", - category, + if ( + re.match( + r"(?i)bug\Wfix", + category, + ) + and "Critical Bug Fix" not in category ): category = "Bug Fix (user-visible misbehavior in an official stable release)" diff --git a/tests/ci/ci.py b/tests/ci/ci.py index bb23de142df..af2f4c0a1fc 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -13,14 +13,7 @@ from typing import Any, Dict, List, Optional import docker_images_helper import upload_result_helper from build_check import get_release_or_pr -from ci_config import ( - CI_CONFIG, - Build, - CILabels, - CIStages, - JobNames, - StatusNames, -) +from ci_config import CI from ci_metadata import CiMetadata from ci_utils import GHActions, normalize_string from clickhouse_helper import ( @@ -38,10 +31,11 @@ from commit_status_helper import ( get_commit, post_commit_status, set_status_comment, + get_commit_filtered_statuses, ) from digest_helper import DockerDigester from env_helper import ( - CI, + IS_CI, GITHUB_JOB_API_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID, @@ -295,7 +289,7 @@ def _mark_success_action( batch: int, ) -> None: ci_cache = CiCache(s3, indata["jobs_data"]["digests"]) - job_config = CI_CONFIG.get_job_config(job) + job_config = CI.get_job_config(job) num_batches = job_config.num_batches # if batch is not provided - set to 0 batch = 0 if batch == -1 else batch @@ -305,7 +299,7 @@ def _mark_success_action( # FIXME: find generic design for propagating and handling job status (e.g. stop using statuses in GH api) # now job ca be build job w/o status data, any other job that exit with 0 with or w/o status data - if CI_CONFIG.is_build_job(job): + if CI.is_build_job(job): # there is no CommitStatus for build jobs # create dummy status relying on JobReport # FIXME: consider creating commit status for build jobs too, to treat everything the same way @@ -425,6 +419,7 @@ def _configure_jobs( pr_info: PRInfo, ci_settings: CiSettings, skip_jobs: bool, + dry_run: bool = False, ) -> CiCache: """ returns CICache instance with configured job's data @@ -436,10 +431,11 @@ def _configure_jobs( # get all jobs if not skip_jobs: - job_configs = CI_CONFIG.get_workflow_jobs_with_configs( + job_configs = CI.get_workflow_jobs_with_configs( is_mq=pr_info.is_merge_queue, is_docs_only=pr_info.has_changes_in_documentation_only(), is_master=pr_info.is_master, + is_pr=pr_info.is_pr, ) else: job_configs = {} @@ -457,7 +453,8 @@ def _configure_jobs( ci_cache = CiCache.calc_digests_and_create( s3, job_configs, - cache_enabled=not ci_settings.no_ci_cache and not skip_jobs and CI, + cache_enabled=not ci_settings.no_ci_cache and not skip_jobs and IS_CI, + dry_run=dry_run, ) ci_cache.update() ci_cache.apply(job_configs, is_release=pr_info.is_release) @@ -465,7 +462,9 @@ def _configure_jobs( return ci_cache -def _generate_ci_stage_config(jobs_data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: +def _generate_ci_stage_config( + jobs_data: Dict[str, Any], non_blocking_mode: bool = False +) -> Dict[str, Dict[str, Any]]: """ populates GH Actions' workflow with real jobs "Builds_1": [{"job_name": NAME, "runner_type": RUNNER_TYPE}] @@ -475,14 +474,14 @@ def _generate_ci_stage_config(jobs_data: Dict[str, Any]) -> Dict[str, Dict[str, result = {} # type: Dict[str, Any] stages_to_do = [] for job in jobs_data: - stage_type = CI_CONFIG.get_job_ci_stage(job) - if stage_type == CIStages.NA: + stage_type = CI.get_job_ci_stage(job, non_blocking_ci=non_blocking_mode) + if stage_type == CI.WorkflowStages.NA: continue if stage_type not in result: result[stage_type] = [] stages_to_do.append(stage_type) result[stage_type].append( - {"job_name": job, "runner_type": CI_CONFIG.get_runner_type(job)} + {"job_name": job, "runner_type": CI.JOB_CONFIGS[job].runner_type} ) result["stages_to_do"] = stages_to_do return result @@ -529,10 +528,10 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None: if job not in jobs_to_skip and job not in jobs_to_do: # no need to create status for job that are not supposed to be executed continue - if CI_CONFIG.is_build_job(job): + if CI.is_build_job(job): # no GH status for build jobs continue - job_config = CI_CONFIG.get_job_config(job) + job_config = CI.get_job_config(job) if not job_config: # there might be a new job that does not exist on this branch - skip it continue @@ -558,7 +557,7 @@ def _fetch_commit_tokens(message: str, pr_info: PRInfo) -> List[str]: res = [ match for match in matches - if match in CILabels or match.startswith("job_") or match.startswith("batch_") + if match in CI.Tags or match.startswith("job_") or match.startswith("batch_") ] print(f"CI modifiers from commit message: [{res}]") res_2 = [] @@ -567,7 +566,7 @@ def _fetch_commit_tokens(message: str, pr_info: PRInfo) -> List[str]: res_2 = [ match for match in matches - if match in CILabels + if match in CI.Tags or match.startswith("job_") or match.startswith("batch_") ] @@ -643,7 +642,7 @@ def _upload_build_artifacts( print(f"Report file has been uploaded to [{report_url}]") # Upload master head's binaries - static_bin_name = CI_CONFIG.build_config[build_name].static_binary_name + static_bin_name = CI.get_build_config(build_name).static_binary_name if pr_info.is_master and static_bin_name: # Full binary with debug info: s3_path_full = "/".join((pr_info.base_ref, static_bin_name, "clickhouse-full")) @@ -838,15 +837,15 @@ def _add_build_to_version_history( def _run_test(job_name: str, run_command: str) -> int: assert ( - run_command or CI_CONFIG.get_job_config(job_name).run_command + run_command or CI.get_job_config(job_name).run_command ), "Run command must be provided as input argument or be configured in job config" env = os.environ.copy() - timeout = CI_CONFIG.get_job_config(job_name).timeout or None + timeout = CI.get_job_config(job_name).timeout or None if not run_command: run_command = "/".join( - (os.path.dirname(__file__), CI_CONFIG.get_job_config(job_name).run_command) + (os.path.dirname(__file__), CI.get_job_config(job_name).run_command) ) if ".py" in run_command and not run_command.startswith("python"): run_command = "python3 " + run_command @@ -913,13 +912,23 @@ def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> No def _set_pending_statuses(pr_info: PRInfo) -> None: commit = get_commit(GitHub(get_best_robot_token(), per_page=100), pr_info.sha) try: - print("Set SYNC status to pending") - commit.create_status( - state=PENDING, - target_url="", - description="", - context=StatusNames.SYNC, - ) + found = False + statuses = get_commit_filtered_statuses(commit) + for commit_status in statuses: + if commit_status.context == CI.StatusNames.SYNC: + print( + f"Sync status found [{commit_status.state}], [{commit_status.description}] - won't be overwritten" + ) + found = True + break + if not found: + print("Set Sync status to pending") + commit.create_status( + state=PENDING, + target_url="", + description=CI.SyncState.PENDING, + context=CI.StatusNames.SYNC, + ) except Exception as ex: print(f"ERROR: failed to set GH commit status, ex: {ex}") @@ -952,7 +961,7 @@ def main() -> int: ### CONFIGURE action: start if args.configure: - if CI and pr_info.is_pr: + if IS_CI and pr_info.is_pr: # store meta on s3 (now we need it only for PRs) meta = CiMetadata(s3, pr_info.number, pr_info.head_ref) meta.run_id = int(GITHUB_RUN_ID) @@ -962,7 +971,7 @@ def main() -> int: args.commit_message or None, update_from_api=True ) - if ci_settings.no_merge_commit and CI: + if ci_settings.no_merge_commit and IS_CI: git_runner.run(f"{GIT_PREFIX} checkout {pr_info.sha}") git_ref = git_runner.run(f"{GIT_PREFIX} rev-parse HEAD") @@ -985,21 +994,24 @@ def main() -> int: ) ci_cache.print_status() - if CI and not pr_info.is_merge_queue: + if IS_CI and not pr_info.is_merge_queue: # wait for pending jobs to be finished, await_jobs is a long blocking call ci_cache.await_pending_jobs(pr_info.is_release) if pr_info.is_release: + print("Release/master: CI Cache add pending records for all todo jobs") ci_cache.push_pending_all(pr_info.is_release) # conclude results result["git_ref"] = git_ref result["version"] = version - result["build"] = ci_cache.job_digests[Build.PACKAGE_RELEASE] - result["docs"] = ci_cache.job_digests[JobNames.DOCS_CHECK] + result["build"] = ci_cache.job_digests[CI.BuildNames.PACKAGE_RELEASE] + result["docs"] = ci_cache.job_digests[CI.JobNames.DOCS_CHECK] result["ci_settings"] = ci_settings.as_dict() if not args.skip_jobs: - result["stages_data"] = _generate_ci_stage_config(ci_cache.jobs_to_do) + result["stages_data"] = _generate_ci_stage_config( + ci_cache.jobs_to_do, ci_settings.woolen_wolfdog + ) result["jobs_data"] = { "jobs_to_do": list(ci_cache.jobs_to_do), "jobs_to_skip": ci_cache.jobs_to_skip, @@ -1027,7 +1039,7 @@ def main() -> int: f"Check if rerun for name: [{check_name}], extended name [{check_name_with_group}]" ) previous_status = None - if CI_CONFIG.is_build_job(check_name): + if CI.is_build_job(check_name): # this is a build job - check if a build report is present build_result = ( BuildResult.load_any(check_name, pr_info.number, pr_info.head_ref) @@ -1053,25 +1065,25 @@ def main() -> int: ) # rerun helper check - # FIXME: remove rerun_helper check and rely on ci cache only + # FIXME: Find a way to identify if job restarted manually (by developer) or by automatic workflow restart (died spot-instance) + # disable rerun check for the former if check_name not in ( - # we might want to rerun reports' jobs - disable rerun check for them - JobNames.BUILD_CHECK, - JobNames.BUILD_CHECK_SPECIAL, - ): + CI.JobNames.BUILD_CHECK, + ): # we might want to rerun build report job rerun_helper = RerunHelper(commit, check_name_with_group) if rerun_helper.is_already_finished_by_status(): + print("WARNING: Rerunning job with GH status ") status = rerun_helper.get_finished_status() assert status - previous_status = status.state print("::group::Commit Status") print(status) print("::endgroup::") + previous_status = status.state # ci cache check if not previous_status and not ci_settings.no_ci_cache: ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update() - job_config = CI_CONFIG.get_job_config(check_name) + job_config = CI.get_job_config(check_name) if ci_cache.is_successful( check_name, args.batch, @@ -1111,7 +1123,7 @@ def main() -> int: ch_helper = ClickHouseHelper() check_url = "" - if CI_CONFIG.is_build_job(args.job_name): + if CI.is_build_job(args.job_name): assert ( indata ), f"--infile with config must be provided for POST action of a build type job [{args.job_name}]" @@ -1119,8 +1131,7 @@ def main() -> int: # upload binaries only for normal builds in PRs upload_binary = ( not pr_info.is_pr - or args.job_name - not in CI_CONFIG.get_builds_for_report(JobNames.BUILD_CHECK_SPECIAL) + or CI.get_job_ci_stage(args.job_name) == CI.WorkflowStages.BUILDS_1 or CiSettings.create_from_run_config(indata).upload_all ) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 56a84272a63..8ee0ae54385 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -5,7 +5,8 @@ from enum import Enum from pathlib import Path from typing import Dict, Optional, Any, Union, Sequence, List, Set -from ci_config import JobNames, Build, CI_CONFIG, JobConfig +from ci_config import CI + from ci_utils import is_hex, GHActions from commit_status_helper import CommitStatusData from env_helper import ( @@ -41,7 +42,7 @@ class CiCache: release - for jobs being executed on the release branch including master branch (not a PR branch) """ - _REQUIRED_DIGESTS = [JobNames.DOCS_CHECK, Build.PACKAGE_RELEASE] + _REQUIRED_DIGESTS = [CI.JobNames.DOCS_CHECK, CI.BuildNames.PACKAGE_RELEASE] _S3_CACHE_PREFIX = "CI_cache_v1" _CACHE_BUILD_REPORT_PREFIX = "build_report" _RECORD_FILE_EXTENSION = ".ci" @@ -80,7 +81,7 @@ class CiCache: @classmethod def is_docs_job(cls, job_name: str) -> bool: - return job_name == JobNames.DOCS_CHECK + return job_name == CI.JobNames.DOCS_CHECK @classmethod def is_srcs_job(cls, job_name: str) -> bool: @@ -105,8 +106,8 @@ class CiCache: ): self.enabled = cache_enabled self.jobs_to_skip = [] # type: List[str] - self.jobs_to_wait = {} # type: Dict[str, JobConfig] - self.jobs_to_do = {} # type: Dict[str, JobConfig] + self.jobs_to_wait = {} # type: Dict[str, CI.JobConfig] + self.jobs_to_do = {} # type: Dict[str, CI.JobConfig] self.s3 = s3 self.job_digests = job_digests self.cache_s3_paths = { @@ -127,9 +128,13 @@ class CiCache: @classmethod def calc_digests_and_create( - cls, s3: S3Helper, job_configs: Dict[str, JobConfig], cache_enabled: bool = True + cls, + s3: S3Helper, + job_configs: Dict[str, CI.JobConfig], + cache_enabled: bool = True, + dry_run: bool = False, ) -> "CiCache": - job_digester = JobDigester() + job_digester = JobDigester(dry_run=dry_run) digests = {} print("::group::Job Digests") @@ -140,9 +145,7 @@ class CiCache: for job in cls._REQUIRED_DIGESTS: if job not in job_configs: - digest = job_digester.get_job_digest( - CI_CONFIG.get_job_config(job).digest - ) + digest = job_digester.get_job_digest(CI.get_job_config(job).digest) digests[job] = digest print( f" job [{job.rjust(50)}] required for CI Cache has digest [{digest}]" @@ -154,10 +157,10 @@ class CiCache: self, job_digests: Dict[str, str], job_type: JobType ) -> str: if job_type == self.JobType.DOCS: - res = job_digests[JobNames.DOCS_CHECK] + res = job_digests[CI.JobNames.DOCS_CHECK] elif job_type == self.JobType.SRCS: - if Build.PACKAGE_RELEASE in job_digests: - res = job_digests[Build.PACKAGE_RELEASE] + if CI.BuildNames.PACKAGE_RELEASE in job_digests: + res = job_digests[CI.BuildNames.PACKAGE_RELEASE] else: assert False, "BUG, no build job in digest' list" else: @@ -648,7 +651,7 @@ class CiCache: report_path = Path(REPORT_PATH) report_path.mkdir(exist_ok=True, parents=True) path = ( - self._get_record_s3_path(Build.PACKAGE_RELEASE) + self._get_record_s3_path(CI.BuildNames.PACKAGE_RELEASE) + self._CACHE_BUILD_REPORT_PREFIX ) if file_prefix: @@ -664,13 +667,14 @@ class CiCache: def upload_build_report(self, build_result: BuildResult) -> str: result_json_path = build_result.write_json(Path(TEMP_PATH)) s3_path = ( - self._get_record_s3_path(Build.PACKAGE_RELEASE) + result_json_path.name + self._get_record_s3_path(CI.BuildNames.PACKAGE_RELEASE) + + result_json_path.name ) return self.s3.upload_file( bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path ) - def await_pending_jobs(self, is_release: bool) -> None: + def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None: """ await pending jobs to be finished @jobs_with_params - jobs to await. {JOB_NAME: {"batches": [BATCHES...], "num_batches": NUM_BATCHES}} @@ -687,15 +691,9 @@ class CiCache: MAX_JOB_NUM_TO_WAIT = 3 round_cnt = 0 - # FIXME: temporary experiment: lets enable await for PR' workflows awaiting on build' jobs only + # FIXME: temporary experiment: lets enable await for PR' workflows but for a shorter time if not is_release: - MAX_ROUNDS_TO_WAIT = 1 - remove_from_wait = [] - for job in self.jobs_to_wait: - if job not in Build: - remove_from_wait.append(job) - for job in remove_from_wait: - del self.jobs_to_wait[job] + MAX_ROUNDS_TO_WAIT = 3 while ( len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT @@ -713,11 +711,12 @@ class CiCache: start_at = int(time.time()) while expired_sec < TIMEOUT and self.jobs_to_wait: await_finished: Set[str] = set() - time.sleep(poll_interval_sec) + if not dry_run: + time.sleep(poll_interval_sec) self.update() for job_name, job_config in self.jobs_to_wait.items(): num_batches = job_config.num_batches - job_config = CI_CONFIG.get_job_config(job_name) + job_config = CI.get_job_config(job_name) assert job_config.pending_batches assert job_config.batches pending_batches = list(job_config.pending_batches) @@ -741,12 +740,11 @@ class CiCache: f"Job [{job_name}_[{batch}/{num_batches}]] is not pending anymore" ) job_config.batches.remove(batch) - job_config.pending_batches.remove(batch) else: print( f"NOTE: Job [{job_name}:{batch}] finished failed - do not add to ready" ) - job_config.pending_batches.remove(batch) + job_config.pending_batches.remove(batch) if not job_config.pending_batches: await_finished.add(job_name) @@ -754,18 +752,25 @@ class CiCache: for job in await_finished: self.jobs_to_skip.append(job) del self.jobs_to_wait[job] + del self.jobs_to_do[job] - expired_sec = int(time.time()) - start_at - print( - f"...awaiting continues... seconds left [{TIMEOUT - expired_sec}]" - ) + if not dry_run: + expired_sec = int(time.time()) - start_at + print( + f"...awaiting continues... seconds left [{TIMEOUT - expired_sec}]" + ) + else: + # make up for 2 iterations in dry_run + expired_sec += int(TIMEOUT / 2) + 1 GHActions.print_in_group( "Remaining jobs:", [list(self.jobs_to_wait)], ) - def apply(self, job_configs: Dict[str, JobConfig], is_release: bool) -> "CiCache": + def apply( + self, job_configs: Dict[str, CI.JobConfig], is_release: bool + ) -> "CiCache": if not self.enabled: self.jobs_to_do = job_configs return self diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 6ab1eb8bac4..bef43083a35 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,749 +1,621 @@ -#!/usr/bin/env python3 - -import logging import random import re from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser -from copy import deepcopy -from dataclasses import dataclass, field -from pathlib import Path -from typing import Callable, Dict, Iterable, List, Literal, Optional, Union +from typing import Dict, Optional, List -from ci_utils import WithIter, normalize_string -from integration_test_images import IMAGES +from ci_utils import normalize_string +from ci_definitions import * -class WorkFlows(metaclass=WithIter): - PULL_REQUEST = "PULL_REQUEST" - MASTER = "MASTER" - BACKPORT = "BACKPORT" - RELEASE = "RELEASE" - SYNC = "SYNC" - - -class CIStages(metaclass=WithIter): - NA = "UNKNOWN" - BUILDS_1 = "Builds_1" - BUILDS_2 = "Builds_2" - TESTS_1 = "Tests_1" - TESTS_2 = "Tests_2" - TESTS_3 = "Tests_3" - - -class Runners(metaclass=WithIter): - BUILDER = "builder" - STYLE_CHECKER = "style-checker" - STYLE_CHECKER_ARM = "style-checker-aarch64" - FUNC_TESTER = "func-tester" - FUNC_TESTER_ARM = "func-tester-aarch64" - STRESS_TESTER = "stress-tester" - FUZZER_UNIT_TESTER = "fuzzer-unit-tester" - - -class CILabels(metaclass=WithIter): - """ - Label names or commit tokens in normalized form - """ - - DO_NOT_TEST_LABEL = "do_not_test" - NO_MERGE_COMMIT = "no_merge_commit" - NO_CI_CACHE = "no_ci_cache" - # to upload all binaries from build jobs - UPLOAD_ALL_ARTIFACTS = "upload_all" - CI_SET_SYNC = "ci_set_sync" - CI_SET_ARM = "ci_set_arm" - CI_SET_REQUIRED = "ci_set_required" - CI_SET_NORMAL_BUILDS = "ci_set_normal_builds" - CI_SET_SPECIAL_BUILDS = "ci_set_special_builds" - CI_SET_NON_REQUIRED = "ci_set_non_required" - CI_SET_OLD_ANALYZER = "ci_set_old_analyzer" - - libFuzzer = "libFuzzer" - - -class Build(metaclass=WithIter): - PACKAGE_RELEASE = "package_release" - PACKAGE_AARCH64 = "package_aarch64" - PACKAGE_ASAN = "package_asan" - PACKAGE_UBSAN = "package_ubsan" - PACKAGE_TSAN = "package_tsan" - PACKAGE_MSAN = "package_msan" - PACKAGE_DEBUG = "package_debug" - PACKAGE_RELEASE_COVERAGE = "package_release_coverage" - BINARY_RELEASE = "binary_release" - BINARY_TIDY = "binary_tidy" - BINARY_DARWIN = "binary_darwin" - BINARY_AARCH64 = "binary_aarch64" - BINARY_AARCH64_V80COMPAT = "binary_aarch64_v80compat" - BINARY_FREEBSD = "binary_freebsd" - BINARY_DARWIN_AARCH64 = "binary_darwin_aarch64" - BINARY_PPC64LE = "binary_ppc64le" - BINARY_AMD64_COMPAT = "binary_amd64_compat" - BINARY_AMD64_MUSL = "binary_amd64_musl" - BINARY_RISCV64 = "binary_riscv64" - BINARY_S390X = "binary_s390x" - BINARY_LOONGARCH64 = "binary_loongarch64" - FUZZERS = "fuzzers" - - -class JobNames(metaclass=WithIter): - STYLE_CHECK = "Style check" - FAST_TEST = "Fast test" - DOCKER_SERVER = "Docker server image" - DOCKER_KEEPER = "Docker keeper image" - INSTALL_TEST_AMD = "Install packages (amd64)" - INSTALL_TEST_ARM = "Install packages (arm64)" - - STATELESS_TEST_DEBUG = "Stateless tests (debug)" - STATELESS_TEST_RELEASE = "Stateless tests (release)" - STATELESS_TEST_RELEASE_COVERAGE = "Stateless tests (coverage)" - STATELESS_TEST_AARCH64 = "Stateless tests (aarch64)" - STATELESS_TEST_ASAN = "Stateless tests (asan)" - STATELESS_TEST_TSAN = "Stateless tests (tsan)" - STATELESS_TEST_MSAN = "Stateless tests (msan)" - STATELESS_TEST_UBSAN = "Stateless tests (ubsan)" - STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE = ( - "Stateless tests (release, old analyzer, s3, DatabaseReplicated)" - ) - # merged into STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: - # STATELESS_TEST_OLD_ANALYZER_RELEASE = "Stateless tests (release, analyzer)" - # STATELESS_TEST_DB_REPL_RELEASE = "Stateless tests (release, DatabaseReplicated)" - # STATELESS_TEST_S3_RELEASE = "Stateless tests (release, s3 storage)" - STATELESS_TEST_S3_DEBUG = "Stateless tests (debug, s3 storage)" - STATELESS_TEST_S3_TSAN = "Stateless tests (tsan, s3 storage)" - STATELESS_TEST_AZURE_ASAN = "Stateless tests (azure, asan)" - STATELESS_TEST_FLAKY_ASAN = "Stateless tests flaky check (asan)" - - STATEFUL_TEST_DEBUG = "Stateful tests (debug)" - STATEFUL_TEST_RELEASE = "Stateful tests (release)" - STATEFUL_TEST_RELEASE_COVERAGE = "Stateful tests (coverage)" - STATEFUL_TEST_AARCH64 = "Stateful tests (aarch64)" - STATEFUL_TEST_ASAN = "Stateful tests (asan)" - STATEFUL_TEST_TSAN = "Stateful tests (tsan)" - STATEFUL_TEST_MSAN = "Stateful tests (msan)" - STATEFUL_TEST_UBSAN = "Stateful tests (ubsan)" - STATEFUL_TEST_PARALLEL_REPL_RELEASE = "Stateful tests (release, ParallelReplicas)" - STATEFUL_TEST_PARALLEL_REPL_DEBUG = "Stateful tests (debug, ParallelReplicas)" - STATEFUL_TEST_PARALLEL_REPL_ASAN = "Stateful tests (asan, ParallelReplicas)" - STATEFUL_TEST_PARALLEL_REPL_MSAN = "Stateful tests (msan, ParallelReplicas)" - STATEFUL_TEST_PARALLEL_REPL_UBSAN = "Stateful tests (ubsan, ParallelReplicas)" - STATEFUL_TEST_PARALLEL_REPL_TSAN = "Stateful tests (tsan, ParallelReplicas)" - - STRESS_TEST_ASAN = "Stress test (asan)" - STRESS_TEST_TSAN = "Stress test (tsan)" - STRESS_TEST_UBSAN = "Stress test (ubsan)" - STRESS_TEST_MSAN = "Stress test (msan)" - STRESS_TEST_DEBUG = "Stress test (debug)" - STRESS_TEST_AZURE_TSAN = "Stress test (azure, tsan)" - STRESS_TEST_AZURE_MSAN = "Stress test (azure, msan)" - - INTEGRATION_TEST = "Integration tests (release)" - INTEGRATION_TEST_ASAN = "Integration tests (asan)" - INTEGRATION_TEST_ASAN_OLD_ANALYZER = "Integration tests (asan, old analyzer)" - INTEGRATION_TEST_TSAN = "Integration tests (tsan)" - INTEGRATION_TEST_ARM = "Integration tests (aarch64)" - INTEGRATION_TEST_FLAKY = "Integration tests flaky check (asan)" - - UPGRADE_TEST_DEBUG = "Upgrade check (debug)" - UPGRADE_TEST_ASAN = "Upgrade check (asan)" - UPGRADE_TEST_TSAN = "Upgrade check (tsan)" - UPGRADE_TEST_MSAN = "Upgrade check (msan)" - - UNIT_TEST = "Unit tests (release)" - UNIT_TEST_ASAN = "Unit tests (asan)" - UNIT_TEST_MSAN = "Unit tests (msan)" - UNIT_TEST_TSAN = "Unit tests (tsan)" - UNIT_TEST_UBSAN = "Unit tests (ubsan)" - - AST_FUZZER_TEST_DEBUG = "AST fuzzer (debug)" - AST_FUZZER_TEST_ASAN = "AST fuzzer (asan)" - AST_FUZZER_TEST_MSAN = "AST fuzzer (msan)" - AST_FUZZER_TEST_TSAN = "AST fuzzer (tsan)" - AST_FUZZER_TEST_UBSAN = "AST fuzzer (ubsan)" - - JEPSEN_KEEPER = "ClickHouse Keeper Jepsen" - JEPSEN_SERVER = "ClickHouse Server Jepsen" - - PERFORMANCE_TEST_AMD64 = "Performance Comparison" - PERFORMANCE_TEST_ARM64 = "Performance Comparison Aarch64" - - SQL_LOGIC_TEST = "Sqllogic test (release)" - - SQLANCER = "SQLancer (release)" - SQLANCER_DEBUG = "SQLancer (debug)" - SQLTEST = "SQLTest" - - COMPATIBILITY_TEST = "Compatibility check (amd64)" - COMPATIBILITY_TEST_ARM = "Compatibility check (aarch64)" - - CLICKBENCH_TEST = "ClickBench (amd64)" - CLICKBENCH_TEST_ARM = "ClickBench (aarch64)" - - LIBFUZZER_TEST = "libFuzzer tests" - - BUILD_CHECK = "ClickHouse build check" - BUILD_CHECK_SPECIAL = "ClickHouse special build check" - - DOCS_CHECK = "Docs check" - BUGFIX_VALIDATE = "Bugfix validation" - - -class StatusNames(metaclass=WithIter): - "Class with statuses that aren't related to particular jobs" - CI = "CI running" - MERGEABLE = "Mergeable Check" - SYNC = "A Sync" - - -# dynamically update JobName with Build jobs -for attr_name in dir(Build): - if not attr_name.startswith("__") and not callable(getattr(Build, attr_name)): - setattr(JobNames, attr_name, getattr(Build, attr_name)) - - -@dataclass -class DigestConfig: - # all files, dirs to include into digest, glob supported - include_paths: List[Union[str, Path]] = field(default_factory=list) - # file suffixes to exclude from digest - exclude_files: List[str] = field(default_factory=list) - # directories to exclude from digest - exclude_dirs: List[Union[str, Path]] = field(default_factory=list) - # docker names to include into digest - docker: List[str] = field(default_factory=list) - # git submodules digest - git_submodules: bool = False - - -@dataclass -class LabelConfig: - """ - configures different CI scenarios per GH label - """ - - run_jobs: Iterable[str] = frozenset() - - -@dataclass -class JobConfig: - """ - contains config parameters for job execution in CI workflow - """ - - # configures digest calculation for the job - digest: DigestConfig = field(default_factory=DigestConfig) - # will be triggered for the job if omitted in CI workflow yml - run_command: str = "" - # job timeout, seconds - timeout: Optional[int] = None - # sets number of batches for a multi-batch job - num_batches: int = 1 - # label that enables job in CI, if set digest isn't used - run_by_label: str = "" - # to run always regardless of the job digest or/and label - run_always: bool = False - # if the job needs to be run on the release branch, including master (building packages, docker server). - # NOTE: Subsequent runs on the same branch with the similar digest are still considered skip-able. - required_on_release_branch: bool = False - # job is for pr workflow only - pr_only: bool = False - # job is for release/master branches only - release_only: bool = False - # to randomly pick and run one job among jobs in the same @random_bucket (PR branches only). - random_bucket: str = "" - # Do not set it. A list of batches to run. It will be set in runtime in accordance with ci cache and ci settings - batches: Optional[List[int]] = None - # Do not set it. A list of batches to await. It will be set in runtime in accordance with ci cache and ci settings - pending_batches: Optional[List[int]] = None - - -builds_job_config = JobConfig( - required_on_release_branch=True, - digest=DigestConfig( - include_paths=[ - "./src", - "./contrib/*-cmake", - "./contrib/consistent-hashing", - "./contrib/murmurhash", - "./contrib/libfarmhash", - "./contrib/pdqsort", - "./contrib/cityhash102", - "./contrib/sparse-checkout", - "./contrib/libmetrohash", - "./contrib/update-submodules.sh", - "./contrib/CMakeLists.txt", - "./CMakeLists.txt", - "./PreLoad.cmake", - "./cmake", - "./base", - "./programs", - "./packages", - "./docker/packager/packager", - "./rust", - "./tests/ci/version_helper.py", - # FIXME: This is a WA to rebuild the CH and recreate the Performance.tar.zst artifact - # when there are changes in performance test scripts. - # Due to the current design of the perf test we need to rebuild CH when the performance test changes, - # otherwise the changes will not be visible in the PerformanceTest job in CI - "./tests/performance", - ], - exclude_files=[".md"], - docker=["clickhouse/binary-builder"], - git_submodules=True, - ), - run_command="build_check.py $BUILD_NAME", -) -fuzzer_build_job_config = deepcopy(builds_job_config) -fuzzer_build_job_config.run_by_label = CILabels.libFuzzer - - -@dataclass -class BuildConfig: - name: str - compiler: str - package_type: Literal["deb", "binary", "fuzzers"] - additional_pkgs: bool = False - debug_build: bool = False - coverage: bool = False - sanitizer: str = "" - tidy: bool = False - # sparse_checkout is needed only to test the option itself. - # No particular sense to use it in every build, since it slows down the job. - sparse_checkout: bool = False - comment: str = "" - static_binary_name: str = "" - job_config: JobConfig = field(default_factory=lambda: deepcopy(builds_job_config)) - - def export_env(self, export: bool = False) -> str: - def process(field_name: str, field: Union[bool, str]) -> str: - if isinstance(field, bool): - field = str(field).lower() - elif not isinstance(field, str): - field = "" - if export: - return f"export BUILD_{field_name.upper()}={repr(field)}" - return f"BUILD_{field_name.upper()}={field}" - - return "\n".join(process(k, v) for k, v in self.__dict__.items()) - - -@dataclass -class BuildReportConfig: - builds: List[str] - job_config: JobConfig = field( - default_factory=lambda: JobConfig( - run_command='build_report_check.py "$CHECK_NAME"', - digest=DigestConfig( - include_paths=[ - "./tests/ci/build_report_check.py", - "./tests/ci/upload_result_helper.py", - ], - ), - ) - ) - - -@dataclass -class TestConfig: - required_build: str - job_config: JobConfig = field(default_factory=JobConfig) - - -BuildConfigs = Dict[str, BuildConfig] -BuildsReportConfig = Dict[str, BuildReportConfig] -TestConfigs = Dict[str, TestConfig] -LabelConfigs = Dict[str, LabelConfig] - -# common digests configs -compatibility_check_digest = DigestConfig( - include_paths=["./tests/ci/compatibility_check.py"], - docker=["clickhouse/test-old-ubuntu", "clickhouse/test-old-centos"], -) -install_check_digest = DigestConfig( - include_paths=["./tests/ci/install_check.py"], - docker=["clickhouse/install-deb-test", "clickhouse/install-rpm-test"], -) -stateless_check_digest = DigestConfig( - include_paths=[ - "./tests/ci/functional_test_check.py", - "./tests/queries/0_stateless/", - "./tests/clickhouse-test", - "./tests/config", - "./tests/*.txt", - ], - exclude_files=[".md"], - docker=["clickhouse/stateless-test"], -) -stateful_check_digest = DigestConfig( - include_paths=[ - "./tests/ci/functional_test_check.py", - "./tests/queries/1_stateful/", - "./tests/clickhouse-test", - "./tests/config", - "./tests/*.txt", - ], - exclude_files=[".md"], - docker=["clickhouse/stateful-test"], -) - -stress_check_digest = DigestConfig( - include_paths=[ - "./tests/queries/0_stateless/", - "./tests/queries/1_stateful/", - "./tests/clickhouse-test", - "./tests/config", - "./tests/*.txt", - ], - exclude_files=[".md"], - docker=["clickhouse/stress-test"], -) -# FIXME: which tests are upgrade? just python? -upgrade_check_digest = DigestConfig( - include_paths=["./tests/ci/upgrade_check.py"], - exclude_files=[".md"], - docker=["clickhouse/upgrade-check"], -) -integration_check_digest = DigestConfig( - include_paths=[ - "./tests/ci/integration_test_check.py", - "./tests/ci/integration_tests_runner.py", - "./tests/integration/", - ], - exclude_files=[".md"], - docker=IMAGES.copy(), -) - -ast_fuzzer_check_digest = DigestConfig( - # include_paths=["./tests/ci/ast_fuzzer_check.py"], - # exclude_files=[".md"], - # docker=["clickhouse/fuzzer"], -) -unit_check_digest = DigestConfig( - include_paths=["./tests/ci/unit_tests_check.py"], - exclude_files=[".md"], - docker=["clickhouse/unit-test"], -) -perf_check_digest = DigestConfig( - include_paths=[ - "./tests/ci/performance_comparison_check.py", - "./tests/performance/", - ], - exclude_files=[".md"], - docker=["clickhouse/performance-comparison"], -) -sqllancer_check_digest = DigestConfig( - # include_paths=["./tests/ci/sqlancer_check.py"], - # exclude_files=[".md"], - # docker=["clickhouse/sqlancer-test"], -) -sqllogic_check_digest = DigestConfig( - include_paths=["./tests/ci/sqllogic_test.py"], - exclude_files=[".md"], - docker=["clickhouse/sqllogic-test"], -) -sqltest_check_digest = DigestConfig( - include_paths=["./tests/ci/sqltest.py"], - exclude_files=[".md"], - docker=["clickhouse/sqltest"], -) -bugfix_validate_check = DigestConfig( - include_paths=[ - "./tests/queries/0_stateless/", - "./tests/ci/integration_test_check.py", - "./tests/ci/functional_test_check.py", - "./tests/ci/bugfix_validate_check.py", - ], - exclude_files=[".md"], - docker=IMAGES.copy() - + [ - "clickhouse/stateless-test", - ], -) -# common test params -docker_server_job_config = JobConfig( - required_on_release_branch=True, - run_command='docker_server.py --check-name "$CHECK_NAME" --release-type head --allow-build-reuse', - digest=DigestConfig( - include_paths=[ - "tests/ci/docker_server.py", - "./docker/server", - ] - ), -) -compatibility_test_common_params = { - "digest": compatibility_check_digest, - "run_command": "compatibility_check.py", -} -stateless_test_common_params = { - "digest": stateless_check_digest, - "run_command": 'functional_test_check.py "$CHECK_NAME"', - "timeout": 10800, -} -stateful_test_common_params = { - "digest": stateful_check_digest, - "run_command": 'functional_test_check.py "$CHECK_NAME"', - "timeout": 3600, -} -stress_test_common_params = { - "digest": stress_check_digest, - "run_command": "stress_check.py", - "timeout": 9000, -} -upgrade_test_common_params = { - "digest": upgrade_check_digest, - "run_command": "upgrade_check.py", -} -astfuzzer_test_common_params = { - "digest": ast_fuzzer_check_digest, - "run_command": "ast_fuzzer_check.py", - "run_always": True, -} -integration_test_common_params = { - "digest": integration_check_digest, - "run_command": 'integration_test_check.py "$CHECK_NAME"', -} -unit_test_common_params = { - "digest": unit_check_digest, - "run_command": "unit_tests_check.py", -} -perf_test_common_params = { - "digest": perf_check_digest, - "run_command": "performance_comparison_check.py", -} -sqllancer_test_common_params = JobConfig( - digest=sqllancer_check_digest, - run_command="sqlancer_check.py", - release_only=True, - run_always=True, -) -sqllogic_test_params = JobConfig( - digest=sqllogic_check_digest, - run_command="sqllogic_test.py", - timeout=10800, - release_only=True, -) -sql_test_params = JobConfig( - digest=sqltest_check_digest, - run_command="sqltest.py", - timeout=10800, - release_only=True, -) -clickbench_test_params = { - "digest": DigestConfig( - include_paths=[ - "tests/ci/clickbench.py", - ], - docker=["clickhouse/clickbench"], - ), - "run_command": 'clickbench.py "$CHECK_NAME"', - "timeout": 900, -} -install_test_params = JobConfig( - digest=install_check_digest, - run_command='install_check.py "$CHECK_NAME"', - timeout=900, -) - - -@dataclass -class CIConfig: +class CI: """ Contains configs for all jobs in the CI pipeline each config item in the below dicts should be an instance of JobConfig class or inherited from it """ - build_config: BuildConfigs - builds_report_config: BuildsReportConfig - test_configs: TestConfigs - other_jobs_configs: TestConfigs - label_configs: LabelConfigs + # reimport types to CI class so that they visible as CI.* and mypy is happy + # pylint:disable=useless-import-alias,reimported,import-outside-toplevel + from ci_definitions import BuildConfig as BuildConfig + from ci_definitions import DigestConfig as DigestConfig + from ci_definitions import JobConfig as JobConfig + from ci_definitions import CheckDescription as CheckDescription + from ci_definitions import Tags as Tags + from ci_definitions import JobNames as JobNames + from ci_definitions import BuildNames as BuildNames + from ci_definitions import StatusNames as StatusNames + from ci_definitions import CHECK_DESCRIPTIONS as CHECK_DESCRIPTIONS + from ci_definitions import REQUIRED_CHECKS as REQUIRED_CHECKS + from ci_definitions import SyncState as SyncState + from ci_definitions import MQ_JOBS as MQ_JOBS + from ci_definitions import WorkflowStages as WorkflowStages + from ci_definitions import Runners as Runners # Jobs that run for doc related updates _DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK] - # Jobs that run in Merge Queue if it's enabled - _MQ_JOBS = [ - JobNames.STYLE_CHECK, - JobNames.FAST_TEST, - Build.BINARY_RELEASE, - JobNames.UNIT_TEST, - ] + TAG_CONFIGS = { + Tags.DO_NOT_TEST_LABEL: LabelConfig(run_jobs=[JobNames.STYLE_CHECK]), + Tags.CI_SET_ARM: LabelConfig( + run_jobs=[ + JobNames.STYLE_CHECK, + BuildNames.PACKAGE_AARCH64, + JobNames.INTEGRATION_TEST_ARM, + ] + ), + Tags.CI_SET_REQUIRED: LabelConfig(run_jobs=REQUIRED_CHECKS), + Tags.CI_SET_BUILDS: LabelConfig( + run_jobs=[JobNames.STYLE_CHECK, JobNames.BUILD_CHECK] + + [build for build in BuildNames if build != BuildNames.FUZZERS] + ), + Tags.CI_SET_NON_REQUIRED: LabelConfig( + run_jobs=[job for job in JobNames if job not in REQUIRED_CHECKS] + ), + Tags.CI_SET_OLD_ANALYZER: LabelConfig( + run_jobs=[ + JobNames.STYLE_CHECK, + JobNames.FAST_TEST, + BuildNames.PACKAGE_RELEASE, + BuildNames.PACKAGE_ASAN, + JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE, + JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER, + ] + ), + Tags.CI_SET_SYNC: LabelConfig( + run_jobs=[ + BuildNames.PACKAGE_ASAN, + JobNames.STYLE_CHECK, + JobNames.BUILD_CHECK, + JobNames.UNIT_TEST_ASAN, + JobNames.STATEFUL_TEST_ASAN, + ] + ), + } - def get_label_config(self, label_name: str) -> Optional[LabelConfig]: - for label, config in self.label_configs.items(): + JOB_CONFIGS: Dict[str, JobConfig] = { + BuildNames.PACKAGE_RELEASE: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_RELEASE, + compiler="clang-18", + package_type="deb", + static_binary_name="amd64", + additional_pkgs=True, + ) + ), + BuildNames.PACKAGE_AARCH64: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_AARCH64, + compiler="clang-18-aarch64", + package_type="deb", + static_binary_name="aarch64", + additional_pkgs=True, + ) + ), + BuildNames.PACKAGE_ASAN: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_ASAN, + compiler="clang-18", + sanitizer="address", + package_type="deb", + ), + ), + BuildNames.PACKAGE_UBSAN: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_UBSAN, + compiler="clang-18", + sanitizer="undefined", + package_type="deb", + ), + ), + BuildNames.PACKAGE_TSAN: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_TSAN, + compiler="clang-18", + sanitizer="thread", + package_type="deb", + ), + ), + BuildNames.PACKAGE_MSAN: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_MSAN, + compiler="clang-18", + sanitizer="memory", + package_type="deb", + ), + ), + BuildNames.PACKAGE_DEBUG: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_DEBUG, + compiler="clang-18", + debug_build=True, + package_type="deb", + sparse_checkout=True, # Check that it works with at least one build, see also update-submodules.sh + ), + ), + BuildNames.PACKAGE_RELEASE_COVERAGE: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_RELEASE_COVERAGE, + compiler="clang-18", + coverage=True, + package_type="deb", + ), + ), + BuildNames.BINARY_RELEASE: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_RELEASE, + compiler="clang-18", + package_type="binary", + ), + ), + BuildNames.BINARY_TIDY: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_TIDY, + compiler="clang-18", + debug_build=True, + package_type="binary", + static_binary_name="debug-amd64", + tidy=True, + comment="clang-tidy is used for static analysis", + ), + ), + BuildNames.BINARY_DARWIN: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_DARWIN, + compiler="clang-18-darwin", + package_type="binary", + static_binary_name="macos", + ), + ), + BuildNames.BINARY_AARCH64: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_AARCH64, + compiler="clang-18-aarch64", + package_type="binary", + ), + ), + BuildNames.BINARY_AARCH64_V80COMPAT: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_AARCH64_V80COMPAT, + compiler="clang-18-aarch64-v80compat", + package_type="binary", + static_binary_name="aarch64v80compat", + comment="For ARMv8.1 and older", + ), + ), + BuildNames.BINARY_FREEBSD: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_FREEBSD, + compiler="clang-18-freebsd", + package_type="binary", + static_binary_name="freebsd", + ), + ), + BuildNames.BINARY_DARWIN_AARCH64: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_DARWIN_AARCH64, + compiler="clang-18-darwin-aarch64", + package_type="binary", + static_binary_name="macos-aarch64", + ), + ), + BuildNames.BINARY_PPC64LE: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_PPC64LE, + compiler="clang-18-ppc64le", + package_type="binary", + static_binary_name="powerpc64le", + ), + ), + BuildNames.BINARY_AMD64_COMPAT: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_AMD64_COMPAT, + compiler="clang-18-amd64-compat", + package_type="binary", + static_binary_name="amd64compat", + comment="SSE2-only build", + ), + ), + BuildNames.BINARY_AMD64_MUSL: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_AMD64_MUSL, + compiler="clang-18-amd64-musl", + package_type="binary", + static_binary_name="amd64musl", + comment="Build with Musl", + ), + ), + BuildNames.BINARY_RISCV64: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_RISCV64, + compiler="clang-18-riscv64", + package_type="binary", + static_binary_name="riscv64", + ), + ), + BuildNames.BINARY_S390X: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_S390X, + compiler="clang-18-s390x", + package_type="binary", + static_binary_name="s390x", + ), + ), + BuildNames.BINARY_LOONGARCH64: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_LOONGARCH64, + compiler="clang-18-loongarch64", + package_type="binary", + static_binary_name="loongarch64", + ), + ), + BuildNames.FUZZERS: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.FUZZERS, + compiler="clang-18", + package_type="fuzzers", + ), + run_by_label=Tags.libFuzzer, + ), + JobNames.BUILD_CHECK: CommonJobConfigs.BUILD_REPORT.with_properties(), + JobNames.INSTALL_TEST_AMD: CommonJobConfigs.INSTALL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE] + ), + JobNames.INSTALL_TEST_ARM: CommonJobConfigs.INSTALL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + runner_type=Runners.STYLE_CHECKER_ARM, + ), + JobNames.STATEFUL_TEST_ASAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN] + ), + JobNames.STATEFUL_TEST_TSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN] + ), + JobNames.STATEFUL_TEST_MSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN] + ), + JobNames.STATEFUL_TEST_UBSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_UBSAN] + ), + JobNames.STATEFUL_TEST_DEBUG: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG] + ), + JobNames.STATEFUL_TEST_RELEASE: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE] + ), + JobNames.STATEFUL_TEST_RELEASE_COVERAGE: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE_COVERAGE] + ), + JobNames.STATEFUL_TEST_AARCH64: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + runner_type=Runners.FUNC_TESTER_ARM, + ), + JobNames.STATEFUL_TEST_PARALLEL_REPL_RELEASE: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE] + ), + JobNames.STATEFUL_TEST_PARALLEL_REPL_DEBUG: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG] + ), + JobNames.STATEFUL_TEST_PARALLEL_REPL_ASAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], + random_bucket="parrepl_with_sanitizer", + ), + JobNames.STATEFUL_TEST_PARALLEL_REPL_MSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], + random_bucket="parrepl_with_sanitizer", + ), + JobNames.STATEFUL_TEST_PARALLEL_REPL_UBSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_UBSAN], + random_bucket="parrepl_with_sanitizer", + ), + JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], + random_bucket="parrepl_with_sanitizer", + ), + JobNames.STATELESS_TEST_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4 + ), + JobNames.STATELESS_TEST_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], num_batches=5 + ), + JobNames.STATELESS_TEST_MSAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], num_batches=6 + ), + JobNames.STATELESS_TEST_UBSAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_UBSAN], num_batches=2 + ), + JobNames.STATELESS_TEST_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=5 + ), + JobNames.STATELESS_TEST_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + ), + JobNames.STATELESS_TEST_RELEASE_COVERAGE: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE_COVERAGE], num_batches=6 + ), + JobNames.STATELESS_TEST_AARCH64: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + runner_type=Runners.FUNC_TESTER_ARM, + ), + JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 + ), + JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=6 + ), + JobNames.STATELESS_TEST_AZURE_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4, release_only=True + ), + JobNames.STATELESS_TEST_S3_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], + num_batches=5, + ), + JobNames.STRESS_TEST_DEBUG: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG], + ), + JobNames.STRESS_TEST_TSAN: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], + ), + JobNames.STRESS_TEST_ASAN: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], + random_bucket="stress_with_sanitizer", + ), + JobNames.STRESS_TEST_UBSAN: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_UBSAN], + random_bucket="stress_with_sanitizer", + ), + JobNames.STRESS_TEST_MSAN: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], + random_bucket="stress_with_sanitizer", + ), + JobNames.STRESS_TEST_AZURE_TSAN: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], release_only=True + ), + JobNames.STRESS_TEST_AZURE_MSAN: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], release_only=True + ), + JobNames.UPGRADE_TEST_ASAN: CommonJobConfigs.UPGRADE_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], + random_bucket="upgrade_with_sanitizer", + pr_only=True, + ), + JobNames.UPGRADE_TEST_TSAN: CommonJobConfigs.UPGRADE_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], + random_bucket="upgrade_with_sanitizer", + pr_only=True, + ), + JobNames.UPGRADE_TEST_MSAN: CommonJobConfigs.UPGRADE_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], + random_bucket="upgrade_with_sanitizer", + pr_only=True, + ), + JobNames.UPGRADE_TEST_DEBUG: CommonJobConfigs.UPGRADE_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG], pr_only=True + ), + JobNames.INTEGRATION_TEST_ASAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], release_only=True, num_batches=4 + ), + JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=6 + ), + JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6 + ), + JobNames.INTEGRATION_TEST_ARM: CommonJobConfigs.INTEGRATION_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + num_batches=6, + runner_type=Runners.FUNC_TESTER_ARM, + ), + JobNames.INTEGRATION_TEST: CommonJobConfigs.INTEGRATION_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + num_batches=4, + release_only=True, + ), + JobNames.INTEGRATION_TEST_FLAKY: CommonJobConfigs.INTEGRATION_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True + ), + JobNames.COMPATIBILITY_TEST: CommonJobConfigs.COMPATIBILITY_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + required_on_release_branch=True, + ), + JobNames.COMPATIBILITY_TEST_ARM: CommonJobConfigs.COMPATIBILITY_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + required_on_release_branch=True, + runner_type=Runners.STYLE_CHECKER_ARM, + ), + JobNames.UNIT_TEST: CommonJobConfigs.UNIT_TEST.with_properties( + required_builds=[BuildNames.BINARY_RELEASE], + ), + JobNames.UNIT_TEST_ASAN: CommonJobConfigs.UNIT_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], + ), + JobNames.UNIT_TEST_MSAN: CommonJobConfigs.UNIT_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], + ), + JobNames.UNIT_TEST_TSAN: CommonJobConfigs.UNIT_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], + ), + JobNames.UNIT_TEST_UBSAN: CommonJobConfigs.UNIT_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_UBSAN], + ), + JobNames.AST_FUZZER_TEST_DEBUG: CommonJobConfigs.ASTFUZZER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG], + ), + JobNames.AST_FUZZER_TEST_ASAN: CommonJobConfigs.ASTFUZZER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], + ), + JobNames.AST_FUZZER_TEST_MSAN: CommonJobConfigs.ASTFUZZER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], + ), + JobNames.AST_FUZZER_TEST_TSAN: CommonJobConfigs.ASTFUZZER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], + ), + JobNames.AST_FUZZER_TEST_UBSAN: CommonJobConfigs.ASTFUZZER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_UBSAN], + ), + JobNames.STATELESS_TEST_FLAKY_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True, timeout=3600 + ), + JobNames.JEPSEN_KEEPER: JobConfig( + required_builds=[BuildNames.BINARY_RELEASE], + run_by_label="jepsen-test", + run_command="jepsen_check.py keeper", + runner_type=Runners.STYLE_CHECKER_ARM, + ), + JobNames.JEPSEN_SERVER: JobConfig( + required_builds=[BuildNames.BINARY_RELEASE], + run_by_label="jepsen-test", + run_command="jepsen_check.py server", + runner_type=Runners.STYLE_CHECKER_ARM, + ), + JobNames.PERFORMANCE_TEST_AMD64: CommonJobConfigs.PERF_TESTS.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 + ), + JobNames.PERFORMANCE_TEST_ARM64: CommonJobConfigs.PERF_TESTS.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + num_batches=4, + run_by_label="pr-performance", + runner_type=Runners.FUNC_TESTER_ARM, + ), + JobNames.SQLANCER: CommonJobConfigs.SQLLANCER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + ), + JobNames.SQLANCER_DEBUG: CommonJobConfigs.SQLLANCER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG], + ), + JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + ), + JobNames.SQLTEST: CommonJobConfigs.SQL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + ), + JobNames.CLICKBENCH_TEST: CommonJobConfigs.CLICKBENCH_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + ), + JobNames.CLICKBENCH_TEST_ARM: CommonJobConfigs.CLICKBENCH_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + runner_type=Runners.FUNC_TESTER_ARM, + ), + JobNames.LIBFUZZER_TEST: JobConfig( + required_builds=[BuildNames.FUZZERS], + run_by_label=Tags.libFuzzer, + timeout=10800, + run_command='libfuzzer_test_check.py "$CHECK_NAME"', + runner_type=Runners.STYLE_CHECKER, + ), + JobNames.DOCKER_SERVER: CommonJobConfigs.DOCKER_SERVER.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE] + ), + JobNames.DOCKER_KEEPER: CommonJobConfigs.DOCKER_SERVER.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE] + ), + JobNames.DOCS_CHECK: JobConfig( + digest=DigestConfig( + include_paths=["**/*.md", "./docs", "tests/ci/docs_check.py"], + docker=["clickhouse/docs-builder"], + ), + run_command="docs_check.py", + runner_type=Runners.FUNC_TESTER, + ), + JobNames.FAST_TEST: JobConfig( + pr_only=True, + digest=DigestConfig( + include_paths=["./tests/queries/0_stateless/"], + exclude_files=[".md"], + docker=["clickhouse/fasttest"], + ), + timeout=2400, + runner_type=Runners.BUILDER, + ), + JobNames.STYLE_CHECK: JobConfig( + run_always=True, + runner_type=Runners.STYLE_CHECKER_ARM, + ), + JobNames.BUGFIX_VALIDATE: JobConfig( + run_by_label="pr-bugfix", + run_command="bugfix_validate_check.py", + timeout=900, + runner_type=Runners.STYLE_CHECKER, + ), + } + + @classmethod + def get_tag_config(cls, label_name: str) -> Optional[LabelConfig]: + for label, config in cls.TAG_CONFIGS.items(): if normalize_string(label_name) == normalize_string(label): return config return None - def get_job_ci_stage(self, job_name: str) -> str: + @classmethod + def get_job_ci_stage(cls, job_name: str, non_blocking_ci: bool = False) -> str: if job_name in [ JobNames.STYLE_CHECK, JobNames.FAST_TEST, + JobNames.JEPSEN_SERVER, JobNames.JEPSEN_KEEPER, JobNames.BUILD_CHECK, - JobNames.BUILD_CHECK_SPECIAL, ]: - # FIXME: we can't currently handle Jepsen in the Stage as it's job has concurrency directive - # BUILD_CHECK and BUILD_CHECK_SPECIAL runs not in stage because we need them even if Builds stage failed - return CIStages.NA + return WorkflowStages.NA + stage_type = None - if self.is_build_job(job_name): - stage_type = CIStages.BUILDS_1 - if job_name in CI_CONFIG.get_builds_for_report( - JobNames.BUILD_CHECK_SPECIAL - ): - # special builds go to Build_2 stage to not delay Builds_1/Test_1 - stage_type = CIStages.BUILDS_2 - elif self.is_docs_job(job_name): - stage_type = CIStages.TESTS_1 - elif self.is_test_job(job_name): - if job_name in CI_CONFIG.test_configs: - required_build = CI_CONFIG.test_configs[job_name].required_build - assert required_build - if required_build in CI_CONFIG.get_builds_for_report( - JobNames.BUILD_CHECK - ): - stage_type = CIStages.TESTS_1 - else: - stage_type = CIStages.TESTS_2 + if cls.is_build_job(job_name): + for _job, config in cls.JOB_CONFIGS.items(): + if config.required_builds and job_name in config.required_builds: + stage_type = WorkflowStages.BUILDS_1 + break else: - stage_type = CIStages.TESTS_1 - if job_name not in REQUIRED_CHECKS: - stage_type = CIStages.TESTS_3 + stage_type = WorkflowStages.BUILDS_2 + elif cls.is_docs_job(job_name): + stage_type = WorkflowStages.TESTS_1 + elif cls.is_test_job(job_name): + if job_name in CI.JOB_CONFIGS: + if job_name in REQUIRED_CHECKS: + stage_type = WorkflowStages.TESTS_1 + else: + stage_type = WorkflowStages.TESTS_3 assert stage_type, f"BUG [{job_name}]" + if non_blocking_ci and stage_type == WorkflowStages.TESTS_3: + stage_type = WorkflowStages.TESTS_2 return stage_type - def get_job_config(self, check_name: str) -> JobConfig: - res = None - for config in ( - self.build_config, - self.builds_report_config, - self.test_configs, - self.other_jobs_configs, - ): - if check_name in config: # type: ignore - res = config[check_name].job_config # type: ignore - break - return res # type: ignore + @classmethod + def get_job_config(cls, check_name: str) -> JobConfig: + return cls.JOB_CONFIGS[check_name] - def get_runner_type(self, check_name: str) -> str: - result = None - if self.is_build_job(check_name) or check_name == JobNames.FAST_TEST: - result = Runners.BUILDER - elif any( - words in check_name.lower() - for words in [ - "install packages", - "compatibility check", - "docker", - "build check", - "jepsen", - "style check", - ] - ): - result = Runners.STYLE_CHECKER - elif check_name == JobNames.DOCS_CHECK: - # docs job is demanding - result = Runners.FUNC_TESTER_ARM - elif any( - words in check_name.lower() - for words in [ - "stateless", - "stateful", - "clickbench", - "sqllogic test", - "libfuzzer", - "bugfix validation", - ] - ): - result = Runners.FUNC_TESTER - elif any( - words in check_name.lower() - for words in ["stress", "upgrade", "integration", "performance comparison"] - ): - result = Runners.STRESS_TESTER - elif any( - words in check_name.lower() - for words in ["ast fuzzer", "unit tests", "sqlancer", "sqltest"] - ): - result = Runners.FUZZER_UNIT_TESTER + @classmethod + def get_required_build_name(cls, check_name: str) -> str: + assert check_name in cls.JOB_CONFIGS + required_builds = cls.JOB_CONFIGS[check_name].required_builds + assert required_builds and len(required_builds) == 1 + return required_builds[0] - assert result, f"BUG, no runner for [{check_name}]" - - if ( - "aarch" in check_name.lower() or "arm64" in check_name.lower() - ) and "aarch" not in result: - if result == Runners.STRESS_TESTER: - # FIXME: no arm stress tester group atm - result = Runners.FUNC_TESTER_ARM - elif result == Runners.BUILDER: - # crosscompile - no arm required - pass - else: - # switch to aarch64 runner - result += "-aarch64" - - return result - - def get_job_parents(self, check_name: str) -> List[str]: - res = [] - check_name = normalize_string(check_name) - for config in ( - self.build_config, - self.test_configs, - self.other_jobs_configs, - ): - for job_name in config: # type: ignore - if check_name == normalize_string(job_name): - if isinstance(config[job_name], TestConfig): # type: ignore - if config[job_name].required_build: # type: ignore - res.append(config[job_name].required_build) # type: ignore - return res - - def get_digest_config(self, check_name: str) -> DigestConfig: - res = None - for config in ( - self.other_jobs_configs, - self.build_config, - self.builds_report_config, - self.test_configs, - ): - if check_name in config: # type: ignore - res = config[check_name].job_config.digest # type: ignore - assert ( - res - ), f"Invalid check_name or CI_CONFIG outdated, config not found for [{check_name}]" - return res # type: ignore + @classmethod + def get_job_parents(cls, check_name: str) -> List[str]: + return cls.JOB_CONFIGS[check_name].required_builds or [] + @classmethod def get_workflow_jobs_with_configs( - self, is_mq: bool, is_docs_only: bool, is_master: bool + cls, is_mq: bool, is_docs_only: bool, is_master: bool, is_pr: bool ) -> Dict[str, JobConfig]: """ get a list of all jobs for a workflow with configs """ jobs = [] if is_mq: - jobs = self._MQ_JOBS + jobs = MQ_JOBS elif is_docs_only: - jobs = self._DOCS_CHECK_JOBS + jobs = cls._DOCS_CHECK_JOBS else: - for config in ( - self.other_jobs_configs, - self.build_config, - self.builds_report_config, - self.test_configs, - ): - jobs += list(config) # type:ignore + # add all jobs + jobs = list(cls.JOB_CONFIGS) if is_master: - for job in self._MQ_JOBS: + for job in MQ_JOBS: jobs.remove(job) randomization_bucket_jobs = {} # type: Dict[str, Dict[str, JobConfig]] res = {} # type: Dict[str, JobConfig] for job in jobs: - job_config = self.get_job_config(job) + job_config = cls.JOB_CONFIGS[job] - if job_config.random_bucket: + if job_config.random_bucket and is_pr: if job_config.random_bucket not in randomization_bucket_jobs: randomization_bucket_jobs[job_config.random_bucket] = {} randomization_bucket_jobs[job_config.random_bucket][job] = job_config @@ -759,41 +631,20 @@ class CIConfig: return res - def get_builds_for_report( - self, report_name: str, release: bool = False, backport: bool = False - ) -> List[str]: - # hack to modify build list for release and bp wf - assert not (release and backport), "Invalid input" - if backport and report_name == JobNames.BUILD_CHECK: - return [ - Build.PACKAGE_RELEASE, - Build.PACKAGE_AARCH64, - Build.PACKAGE_ASAN, - Build.PACKAGE_TSAN, - Build.PACKAGE_DEBUG, - ] - if (release or backport) and report_name == JobNames.BUILD_CHECK_SPECIAL: - return [ - Build.BINARY_DARWIN, - Build.BINARY_DARWIN_AARCH64, - ] - - return self.builds_report_config[report_name].builds - @classmethod def is_build_job(cls, job: str) -> bool: - return job in Build + return job in cls.BuildNames @classmethod def is_test_job(cls, job: str) -> bool: - return not cls.is_build_job(job) and job != JobNames.STYLE_CHECK + return not cls.is_build_job(job) and job != cls.JobNames.STYLE_CHECK @classmethod def is_docs_job(cls, job: str) -> bool: return job == JobNames.DOCS_CHECK - @staticmethod - def is_required(check_name: str) -> bool: + @classmethod + def is_required(cls, check_name: str) -> bool: """Checks if a check_name is in REQUIRED_CHECKS, including batched jobs""" _BATCH_REGEXP = re.compile(r"\s+\[[0-9/]+\]$") if check_name in REQUIRED_CHECKS: @@ -802,810 +653,15 @@ class CIConfig: return check_name[: batch.start()] in REQUIRED_CHECKS return False - def validate(self) -> None: - errors = [] - for name, build_config in self.build_config.items(): - build_in_reports = False - for _, report_config in self.builds_report_config.items(): - if name in report_config.builds: - build_in_reports = True - break - # All build configs must belong to build_report_config - if not build_in_reports: - logging.error("Build name %s does not belong to build reports", name) - errors.append(f"Build name {name} does not belong to build reports") - # The name should be the same as build_config.name - if not build_config.name == name: - logging.error( - "Build name '%s' does not match the config 'name' value '%s'", - name, - build_config.name, - ) - errors.append( - f"Build name {name} does not match 'name' value '{build_config.name}'" - ) - # All build_report_config values should be in build_config.keys() - for build_report_name, build_report_config in self.builds_report_config.items(): - build_names = build_report_config.builds - missed_names = [ - name for name in build_names if name not in self.build_config.keys() - ] - if missed_names: - logging.error( - "The following names of the build report '%s' " - "are missed in build_config: %s", - build_report_name, - missed_names, - ) - errors.append( - f"The following names of the build report '{build_report_name}' " - f"are missed in build_config: {missed_names}", - ) - # And finally, all tests' requirements must be in the builds - for test_name, test_config in self.test_configs.items(): - if test_config.required_build not in self.build_config.keys(): - logging.error( - "The requirement '%s' for '%s' is not found in builds", - test_config, - test_name, - ) - errors.append( - f"The requirement '{test_config}' for " - f"'{test_name}' is not found in builds" - ) - if ( - test_config.required_build - and test_config.required_build - not in self.builds_report_config[JobNames.BUILD_CHECK].builds - ): - errors.append( - f"Test job' required build must be from [{JobNames.BUILD_CHECK}] list" - ) - - if errors: - raise KeyError("config contains errors", errors) + @classmethod + def get_build_config(cls, build_name: str) -> BuildConfig: + assert build_name in cls.JOB_CONFIGS, f"Invalid build name [{build_name}]" + res = cls.JOB_CONFIGS[build_name].build_config + assert res, f"not a build [{build_name}] or invalid JobConfig" + return res -# checks required by Mergeable Check -REQUIRED_CHECKS = [ - "PR Check", - StatusNames.SYNC, - JobNames.BUILD_CHECK, - JobNames.BUILD_CHECK_SPECIAL, - JobNames.DOCS_CHECK, - JobNames.FAST_TEST, - JobNames.STATEFUL_TEST_RELEASE, - JobNames.STATELESS_TEST_RELEASE, - JobNames.STATELESS_TEST_ASAN, - JobNames.STATELESS_TEST_FLAKY_ASAN, - JobNames.STATEFUL_TEST_ASAN, - JobNames.STYLE_CHECK, - JobNames.UNIT_TEST_ASAN, - JobNames.UNIT_TEST_MSAN, - JobNames.UNIT_TEST, - JobNames.UNIT_TEST_TSAN, - JobNames.UNIT_TEST_UBSAN, - JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER, - JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE, -] - -CI_CONFIG = CIConfig( - label_configs={ - CILabels.DO_NOT_TEST_LABEL: LabelConfig(run_jobs=[JobNames.STYLE_CHECK]), - CILabels.CI_SET_ARM: LabelConfig( - run_jobs=[ - JobNames.STYLE_CHECK, - Build.PACKAGE_AARCH64, - JobNames.INTEGRATION_TEST_ARM, - ] - ), - CILabels.CI_SET_REQUIRED: LabelConfig(run_jobs=REQUIRED_CHECKS), - CILabels.CI_SET_NORMAL_BUILDS: LabelConfig( - run_jobs=[ - JobNames.STYLE_CHECK, - JobNames.BUILD_CHECK, - Build.PACKAGE_RELEASE, - Build.PACKAGE_AARCH64, - Build.PACKAGE_ASAN, - Build.PACKAGE_UBSAN, - Build.PACKAGE_TSAN, - Build.PACKAGE_MSAN, - Build.PACKAGE_DEBUG, - Build.BINARY_RELEASE, - Build.PACKAGE_RELEASE_COVERAGE, - Build.FUZZERS, - ] - ), - CILabels.CI_SET_SPECIAL_BUILDS: LabelConfig( - run_jobs=[ - JobNames.STYLE_CHECK, - JobNames.BUILD_CHECK_SPECIAL, - Build.BINARY_TIDY, - Build.BINARY_DARWIN, - Build.BINARY_AARCH64, - Build.BINARY_AARCH64_V80COMPAT, - Build.BINARY_FREEBSD, - Build.BINARY_DARWIN_AARCH64, - Build.BINARY_PPC64LE, - Build.BINARY_RISCV64, - Build.BINARY_S390X, - Build.BINARY_LOONGARCH64, - Build.BINARY_AMD64_COMPAT, - Build.BINARY_AMD64_MUSL, - ] - ), - CILabels.CI_SET_NON_REQUIRED: LabelConfig( - run_jobs=[job for job in JobNames if job not in REQUIRED_CHECKS] - ), - CILabels.CI_SET_OLD_ANALYZER: LabelConfig( - run_jobs=[ - JobNames.STYLE_CHECK, - JobNames.FAST_TEST, - Build.PACKAGE_RELEASE, - Build.PACKAGE_ASAN, - JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE, - JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER, - ] - ), - CILabels.CI_SET_SYNC: LabelConfig( - run_jobs=[ - Build.PACKAGE_ASAN, - JobNames.STYLE_CHECK, - JobNames.BUILD_CHECK, - JobNames.UNIT_TEST_ASAN, - JobNames.STATEFUL_TEST_ASAN, - ] - ), - }, - build_config={ - Build.PACKAGE_RELEASE: BuildConfig( - name=Build.PACKAGE_RELEASE, - compiler="clang-18", - package_type="deb", - static_binary_name="amd64", - additional_pkgs=True, - ), - Build.PACKAGE_AARCH64: BuildConfig( - name=Build.PACKAGE_AARCH64, - compiler="clang-18-aarch64", - package_type="deb", - static_binary_name="aarch64", - additional_pkgs=True, - ), - Build.PACKAGE_ASAN: BuildConfig( - name=Build.PACKAGE_ASAN, - compiler="clang-18", - sanitizer="address", - package_type="deb", - ), - Build.PACKAGE_UBSAN: BuildConfig( - name=Build.PACKAGE_UBSAN, - compiler="clang-18", - sanitizer="undefined", - package_type="deb", - ), - Build.PACKAGE_TSAN: BuildConfig( - name=Build.PACKAGE_TSAN, - compiler="clang-18", - sanitizer="thread", - package_type="deb", - ), - Build.PACKAGE_MSAN: BuildConfig( - name=Build.PACKAGE_MSAN, - compiler="clang-18", - sanitizer="memory", - package_type="deb", - ), - Build.PACKAGE_DEBUG: BuildConfig( - name=Build.PACKAGE_DEBUG, - compiler="clang-18", - debug_build=True, - package_type="deb", - sparse_checkout=True, # Check that it works with at least one build, see also update-submodules.sh - ), - Build.PACKAGE_RELEASE_COVERAGE: BuildConfig( - name=Build.PACKAGE_RELEASE_COVERAGE, - compiler="clang-18", - coverage=True, - package_type="deb", - ), - Build.BINARY_RELEASE: BuildConfig( - name=Build.BINARY_RELEASE, - compiler="clang-18", - package_type="binary", - ), - Build.BINARY_TIDY: BuildConfig( - name=Build.BINARY_TIDY, - compiler="clang-18", - debug_build=True, - package_type="binary", - static_binary_name="debug-amd64", - tidy=True, - comment="clang-tidy is used for static analysis", - ), - Build.BINARY_DARWIN: BuildConfig( - name=Build.BINARY_DARWIN, - compiler="clang-18-darwin", - package_type="binary", - static_binary_name="macos", - ), - Build.BINARY_AARCH64: BuildConfig( - name=Build.BINARY_AARCH64, - compiler="clang-18-aarch64", - package_type="binary", - ), - Build.BINARY_AARCH64_V80COMPAT: BuildConfig( - name=Build.BINARY_AARCH64_V80COMPAT, - compiler="clang-18-aarch64-v80compat", - package_type="binary", - static_binary_name="aarch64v80compat", - comment="For ARMv8.1 and older", - ), - Build.BINARY_FREEBSD: BuildConfig( - name=Build.BINARY_FREEBSD, - compiler="clang-18-freebsd", - package_type="binary", - static_binary_name="freebsd", - ), - Build.BINARY_DARWIN_AARCH64: BuildConfig( - name=Build.BINARY_DARWIN_AARCH64, - compiler="clang-18-darwin-aarch64", - package_type="binary", - static_binary_name="macos-aarch64", - ), - Build.BINARY_PPC64LE: BuildConfig( - name=Build.BINARY_PPC64LE, - compiler="clang-18-ppc64le", - package_type="binary", - static_binary_name="powerpc64le", - ), - Build.BINARY_AMD64_COMPAT: BuildConfig( - name=Build.BINARY_AMD64_COMPAT, - compiler="clang-18-amd64-compat", - package_type="binary", - static_binary_name="amd64compat", - comment="SSE2-only build", - ), - Build.BINARY_AMD64_MUSL: BuildConfig( - name=Build.BINARY_AMD64_MUSL, - compiler="clang-18-amd64-musl", - package_type="binary", - static_binary_name="amd64musl", - comment="Build with Musl", - ), - Build.BINARY_RISCV64: BuildConfig( - name=Build.BINARY_RISCV64, - compiler="clang-18-riscv64", - package_type="binary", - static_binary_name="riscv64", - ), - Build.BINARY_S390X: BuildConfig( - name=Build.BINARY_S390X, - compiler="clang-18-s390x", - package_type="binary", - static_binary_name="s390x", - ), - Build.BINARY_LOONGARCH64: BuildConfig( - name=Build.BINARY_LOONGARCH64, - compiler="clang-18-loongarch64", - package_type="binary", - static_binary_name="loongarch64", - ), - Build.FUZZERS: BuildConfig( - name=Build.FUZZERS, - compiler="clang-18", - package_type="fuzzers", - job_config=fuzzer_build_job_config, - ), - }, - builds_report_config={ - JobNames.BUILD_CHECK: BuildReportConfig( - builds=[ - Build.PACKAGE_RELEASE, - Build.PACKAGE_AARCH64, - Build.PACKAGE_ASAN, - Build.PACKAGE_UBSAN, - Build.PACKAGE_TSAN, - Build.PACKAGE_MSAN, - Build.PACKAGE_DEBUG, - Build.BINARY_RELEASE, - Build.PACKAGE_RELEASE_COVERAGE, - Build.FUZZERS, - ] - ), - JobNames.BUILD_CHECK_SPECIAL: BuildReportConfig( - builds=[ - Build.BINARY_TIDY, - Build.BINARY_DARWIN, - Build.BINARY_AARCH64, - Build.BINARY_AARCH64_V80COMPAT, - Build.BINARY_FREEBSD, - Build.BINARY_DARWIN_AARCH64, - Build.BINARY_PPC64LE, - Build.BINARY_RISCV64, - Build.BINARY_S390X, - Build.BINARY_LOONGARCH64, - Build.BINARY_AMD64_COMPAT, - Build.BINARY_AMD64_MUSL, - ] - ), - }, - other_jobs_configs={ - JobNames.DOCKER_SERVER: TestConfig("", job_config=docker_server_job_config), - JobNames.DOCKER_KEEPER: TestConfig("", job_config=docker_server_job_config), - JobNames.DOCS_CHECK: TestConfig( - "", - job_config=JobConfig( - digest=DigestConfig( - include_paths=["**/*.md", "./docs", "tests/ci/docs_check.py"], - docker=["clickhouse/docs-builder"], - ), - run_command="docs_check.py", - ), - ), - JobNames.FAST_TEST: TestConfig( - "", - job_config=JobConfig( - pr_only=True, - digest=DigestConfig( - include_paths=["./tests/queries/0_stateless/"], - exclude_files=[".md"], - docker=["clickhouse/fasttest"], - ), - timeout=2400, - ), - ), - JobNames.STYLE_CHECK: TestConfig( - "", - job_config=JobConfig( - run_always=True, - ), - ), - JobNames.BUGFIX_VALIDATE: TestConfig( - "", - # we run this check by label - no digest required - job_config=JobConfig( - run_by_label="pr-bugfix", - run_command="bugfix_validate_check.py", - timeout=900, - ), - ), - }, - test_configs={ - JobNames.INSTALL_TEST_AMD: TestConfig( - Build.PACKAGE_RELEASE, job_config=install_test_params - ), - JobNames.INSTALL_TEST_ARM: TestConfig( - Build.PACKAGE_AARCH64, job_config=install_test_params - ), - JobNames.STATEFUL_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_RELEASE: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_RELEASE_COVERAGE: TestConfig( - Build.PACKAGE_RELEASE_COVERAGE, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_AARCH64: TestConfig( - Build.PACKAGE_AARCH64, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - # Stateful tests for parallel replicas - JobNames.STATEFUL_TEST_PARALLEL_REPL_RELEASE: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_PARALLEL_REPL_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_PARALLEL_REPL_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_PARALLEL_REPL_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_PARALLEL_REPL_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore - ), - # End stateful tests for parallel replicas - JobNames.STATELESS_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=4, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, - job_config=JobConfig(num_batches=5, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, - job_config=JobConfig(num_batches=6, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, - job_config=JobConfig(num_batches=2, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, - job_config=JobConfig(num_batches=5, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_RELEASE: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**stateless_test_common_params) # type: ignore - ), - JobNames.STATELESS_TEST_RELEASE_COVERAGE: TestConfig( - Build.PACKAGE_RELEASE_COVERAGE, - job_config=JobConfig(num_batches=6, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_AARCH64: TestConfig( - Build.PACKAGE_AARCH64, job_config=JobConfig(**stateless_test_common_params) # type: ignore - ), - JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: TestConfig( - Build.PACKAGE_RELEASE, - job_config=JobConfig(num_batches=4, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_S3_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, - job_config=JobConfig(num_batches=6, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_AZURE_ASAN: TestConfig( - Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=4, **stateless_test_common_params, release_only=True), # type: ignore - ), - JobNames.STATELESS_TEST_S3_TSAN: TestConfig( - Build.PACKAGE_TSAN, - job_config=JobConfig(num_batches=5, **stateless_test_common_params), # type: ignore - ), - JobNames.STRESS_TEST_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params) # type: ignore - ), - JobNames.STRESS_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore - ), - JobNames.STRESS_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore - ), - JobNames.STRESS_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore - ), - JobNames.STRESS_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore - ), - JobNames.UPGRADE_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore - ), - JobNames.STRESS_TEST_AZURE_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params, release_only=True) # type: ignore - ), - JobNames.STRESS_TEST_AZURE_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(**stress_test_common_params, release_only=True) # type: ignore - ), - JobNames.UPGRADE_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore - ), - JobNames.UPGRADE_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore - ), - JobNames.UPGRADE_TEST_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore - ), - JobNames.INTEGRATION_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=4, **integration_test_common_params, release_only=True), # type: ignore - ), - JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: TestConfig( - Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=6, **integration_test_common_params), # type: ignore - ), - JobNames.INTEGRATION_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, - job_config=JobConfig(num_batches=6, **integration_test_common_params), # type: ignore - ), - JobNames.INTEGRATION_TEST_ARM: TestConfig( - Build.PACKAGE_AARCH64, - job_config=JobConfig(num_batches=6, **integration_test_common_params), # type: ignore - ), - JobNames.INTEGRATION_TEST: TestConfig( - Build.PACKAGE_RELEASE, - job_config=JobConfig(num_batches=4, **integration_test_common_params, release_only=True), # type: ignore - ), - JobNames.INTEGRATION_TEST_FLAKY: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, **integration_test_common_params) # type: ignore - ), - JobNames.COMPATIBILITY_TEST: TestConfig( - Build.PACKAGE_RELEASE, - job_config=JobConfig( - required_on_release_branch=True, **compatibility_test_common_params # type: ignore - ), - ), - JobNames.COMPATIBILITY_TEST_ARM: TestConfig( - Build.PACKAGE_AARCH64, - job_config=JobConfig( - required_on_release_branch=True, **compatibility_test_common_params # type: ignore - ), - ), - JobNames.UNIT_TEST: TestConfig( - Build.BINARY_RELEASE, job_config=JobConfig(**unit_test_common_params) # type: ignore - ), - JobNames.UNIT_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(**unit_test_common_params) # type: ignore - ), - JobNames.UNIT_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(**unit_test_common_params) # type: ignore - ), - JobNames.UNIT_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(**unit_test_common_params) # type: ignore - ), - JobNames.UNIT_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(**unit_test_common_params) # type: ignore - ), - JobNames.AST_FUZZER_TEST_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore - ), - JobNames.AST_FUZZER_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore - ), - JobNames.AST_FUZZER_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore - ), - JobNames.AST_FUZZER_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore - ), - JobNames.AST_FUZZER_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore - ), - JobNames.STATELESS_TEST_FLAKY_ASAN: TestConfig( - # replace to non-default - Build.PACKAGE_ASAN, - job_config=JobConfig(pr_only=True, **{**stateless_test_common_params, "timeout": 3600}), # type: ignore - ), - JobNames.JEPSEN_KEEPER: TestConfig( - Build.BINARY_RELEASE, - job_config=JobConfig( - run_by_label="jepsen-test", run_command="jepsen_check.py keeper" - ), - ), - JobNames.JEPSEN_SERVER: TestConfig( - Build.BINARY_RELEASE, - job_config=JobConfig( - run_by_label="jepsen-test", run_command="jepsen_check.py server" - ), - ), - JobNames.PERFORMANCE_TEST_AMD64: TestConfig( - Build.PACKAGE_RELEASE, - job_config=JobConfig(num_batches=4, **perf_test_common_params), # type: ignore - ), - JobNames.PERFORMANCE_TEST_ARM64: TestConfig( - Build.PACKAGE_AARCH64, - job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params), # type: ignore - ), - JobNames.SQLANCER: TestConfig( - Build.PACKAGE_RELEASE, job_config=sqllancer_test_common_params - ), - JobNames.SQLANCER_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=sqllancer_test_common_params - ), - JobNames.SQL_LOGIC_TEST: TestConfig( - Build.PACKAGE_RELEASE, job_config=sqllogic_test_params - ), - JobNames.SQLTEST: TestConfig(Build.PACKAGE_RELEASE, job_config=sql_test_params), - JobNames.CLICKBENCH_TEST: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**clickbench_test_params) # type: ignore - ), - JobNames.CLICKBENCH_TEST_ARM: TestConfig( - Build.PACKAGE_AARCH64, job_config=JobConfig(**clickbench_test_params) # type: ignore - ), - JobNames.LIBFUZZER_TEST: TestConfig( - Build.FUZZERS, - job_config=JobConfig( - run_by_label=CILabels.libFuzzer, - timeout=10800, - run_command='libfuzzer_test_check.py "$CHECK_NAME"', - ), - ), # type: ignore - }, -) -CI_CONFIG.validate() - - -@dataclass -class CheckDescription: - name: str - description: str # the check descriptions, will be put into the status table - match_func: Callable[[str], bool] # the function to check vs the commit status - - def __hash__(self) -> int: - return hash(self.name + self.description) - - -CHECK_DESCRIPTIONS = [ - CheckDescription( - "PR Check", - "Checks correctness of the PR's body", - lambda x: x == "PR Check", - ), - CheckDescription( - StatusNames.SYNC, - "If it fails, ask a maintainer for help", - lambda x: x == StatusNames.SYNC, - ), - CheckDescription( - "AST fuzzer", - "Runs randomly generated queries to catch program errors. " - "The build type is optionally given in parenthesis. " - "If it fails, ask a maintainer for help", - lambda x: x.startswith("AST fuzzer"), - ), - CheckDescription( - JobNames.BUGFIX_VALIDATE, - "Checks that either a new test (functional or integration) or there " - "some changed tests that fail with the binary built on master branch", - lambda x: x == JobNames.BUGFIX_VALIDATE, - ), - CheckDescription( - "CI running", - "A meta-check that indicates the running CI. Normally, it's in success or " - "pending state. The failed status indicates some problems with the PR", - lambda x: x == "CI running", - ), - CheckDescription( - "ClickHouse build check", - "Builds ClickHouse in various configurations for use in further steps. " - "You have to fix the builds that fail. Build logs often has enough " - "information to fix the error, but you might have to reproduce the failure " - "locally. The cmake options can be found in the build log, grepping for " - 'cmake. Use these options and follow the general build process', - lambda x: x.startswith("ClickHouse") and x.endswith("build check"), - ), - CheckDescription( - "Compatibility check", - "Checks that clickhouse binary runs on distributions with old libc " - "versions. If it fails, ask a maintainer for help", - lambda x: x.startswith("Compatibility check"), - ), - CheckDescription( - JobNames.DOCKER_SERVER, - "The check to build and optionally push the mentioned image to docker hub", - lambda x: x.startswith("Docker server"), - ), - CheckDescription( - JobNames.DOCKER_KEEPER, - "The check to build and optionally push the mentioned image to docker hub", - lambda x: x.startswith("Docker keeper"), - ), - CheckDescription( - JobNames.DOCS_CHECK, - "Builds and tests the documentation", - lambda x: x == JobNames.DOCS_CHECK, - ), - CheckDescription( - JobNames.FAST_TEST, - "Normally this is the first check that is ran for a PR. It builds ClickHouse " - 'and runs most of stateless functional tests, ' - "omitting some. If it fails, further checks are not started until it is fixed. " - "Look at the report to see which tests fail, then reproduce the failure " - 'locally as described here', - lambda x: x == JobNames.FAST_TEST, - ), - CheckDescription( - "Flaky tests", - "Checks if new added or modified tests are flaky by running them repeatedly, " - "in parallel, with more randomization. Functional tests are run 100 times " - "with address sanitizer, and additional randomization of thread scheduling. " - "Integration tests are run up to 10 times. If at least once a new test has " - "failed, or was too long, this check will be red. We don't allow flaky tests, " - 'read the doc', - lambda x: "tests flaky check" in x, - ), - CheckDescription( - "Install packages", - "Checks that the built packages are installable in a clear environment", - lambda x: x.startswith("Install packages ("), - ), - CheckDescription( - "Integration tests", - "The integration tests report. In parenthesis the package type is given, " - "and in square brackets are the optional part/total tests", - lambda x: x.startswith("Integration tests ("), - ), - CheckDescription( - StatusNames.MERGEABLE, - "Checks if all other necessary checks are successful", - lambda x: x == StatusNames.MERGEABLE, - ), - CheckDescription( - "Performance Comparison", - "Measure changes in query performance. The performance test report is " - 'described in detail here. ' - "In square brackets are the optional part/total tests", - lambda x: x.startswith("Performance Comparison"), - ), - CheckDescription( - "Push to Dockerhub", - "The check for building and pushing the CI related docker images to docker hub", - lambda x: x.startswith("Push") and "to Dockerhub" in x, - ), - CheckDescription( - "Sqllogic", - "Run clickhouse on the " - 'sqllogic ' - "test set against sqlite and checks that all statements are passed", - lambda x: x.startswith("Sqllogic test"), - ), - CheckDescription( - "SQLancer", - "Fuzzing tests that detect logical bugs with " - 'SQLancer tool', - lambda x: x.startswith("SQLancer"), - ), - CheckDescription( - "Stateful tests", - "Runs stateful functional tests for ClickHouse binaries built in various " - "configurations -- release, debug, with sanitizers, etc", - lambda x: x.startswith("Stateful tests ("), - ), - CheckDescription( - "Stateless tests", - "Runs stateless functional tests for ClickHouse binaries built in various " - "configurations -- release, debug, with sanitizers, etc", - lambda x: x.startswith("Stateless tests ("), - ), - CheckDescription( - "Stress test", - "Runs stateless functional tests concurrently from several clients to detect " - "concurrency-related errors", - lambda x: x.startswith("Stress test ("), - ), - CheckDescription( - JobNames.STYLE_CHECK, - "Runs a set of checks to keep the code style clean. If some of tests failed, " - "see the related log from the report", - lambda x: x == JobNames.STYLE_CHECK, - ), - CheckDescription( - "Unit tests", - "Runs the unit tests for different release types", - lambda x: x.startswith("Unit tests ("), - ), - CheckDescription( - "Upgrade check", - "Runs stress tests on server version from last release and then tries to " - "upgrade it to the version from the PR. It checks if the new server can " - "successfully startup without any errors, crashes or sanitizer asserts", - lambda x: x.startswith("Upgrade check ("), - ), - CheckDescription( - "ClickBench", - "Runs [ClickBench](https://github.com/ClickHouse/ClickBench/) with instant-attach table", - lambda x: x.startswith("ClickBench"), - ), - CheckDescription( - "Fallback for unknown", - "There's no description for the check yet, please add it to " - "tests/ci/ci_config.py:CHECK_DESCRIPTIONS", - lambda x: True, - ), -] - - -def main() -> None: +if __name__ == "__main__": parser = ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatter, description="The script provides build config for GITHUB_ENV or shell export", @@ -1617,10 +673,9 @@ def main() -> None: help="if set, the ENV parameters are provided for shell export", ) args = parser.parse_args() - build_config = CI_CONFIG.build_config.get(args.build_name) - if build_config: - print(build_config.export_env(args.export)) - - -if __name__ == "__main__": - main() + assert ( + args.build_name in CI.JOB_CONFIGS + ), f"Build name [{args.build_name}] is not valid" + build_config = CI.JOB_CONFIGS[args.build_name].build_config + assert build_config, "--export must not be used for non-build jobs" + print(build_config.export_env(args.export)) diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py new file mode 100644 index 00000000000..48e1280d939 --- /dev/null +++ b/tests/ci/ci_definitions.py @@ -0,0 +1,785 @@ +import copy +from dataclasses import dataclass, field +from pathlib import Path +from typing import Callable, List, Union, Iterable, Optional, Literal, Any + +from ci_utils import WithIter +from integration_test_images import IMAGES + + +class WorkflowStages(metaclass=WithIter): + """ + Stages of GitHUb actions workflow + """ + + # for jobs that do not belong to any stage, e.g. Build Report Check + NA = "UNKNOWN" + # normal builds (builds that required for further testing) + BUILDS_1 = "Builds_1" + # special builds + BUILDS_2 = "Builds_2" + # all tests required for merge + TESTS_1 = "Tests_1" + # not used atm + TESTS_2 = "Tests_2" + # all tests not required for merge + TESTS_3 = "Tests_3" + + +class Runners(metaclass=WithIter): + """ + GitHub runner's labels + """ + + BUILDER = "builder" + STYLE_CHECKER = "style-checker" + STYLE_CHECKER_ARM = "style-checker-aarch64" + FUNC_TESTER = "func-tester" + FUNC_TESTER_ARM = "func-tester-aarch64" + STRESS_TESTER = "stress-tester" + FUZZER_UNIT_TESTER = "fuzzer-unit-tester" + + +class Tags(metaclass=WithIter): + """ + CI Customization tags (set via PR body or some of them in GH labels, e.g. libFuzzer) + """ + + DO_NOT_TEST_LABEL = "do_not_test" + WOOLEN_WOLFDOG_LABEL = "woolen_wolfdog" + NO_MERGE_COMMIT = "no_merge_commit" + NO_CI_CACHE = "no_ci_cache" + # to upload all binaries from build jobs + UPLOAD_ALL_ARTIFACTS = "upload_all" + CI_SET_SYNC = "ci_set_sync" + CI_SET_ARM = "ci_set_arm" + CI_SET_REQUIRED = "ci_set_required" + CI_SET_BUILDS = "ci_set_builds" + CI_SET_NON_REQUIRED = "ci_set_non_required" + CI_SET_OLD_ANALYZER = "ci_set_old_analyzer" + + libFuzzer = "libFuzzer" + + +class BuildNames(metaclass=WithIter): + """ + Build' job names + """ + + PACKAGE_RELEASE = "package_release" + PACKAGE_AARCH64 = "package_aarch64" + PACKAGE_ASAN = "package_asan" + PACKAGE_UBSAN = "package_ubsan" + PACKAGE_TSAN = "package_tsan" + PACKAGE_MSAN = "package_msan" + PACKAGE_DEBUG = "package_debug" + PACKAGE_RELEASE_COVERAGE = "package_release_coverage" + BINARY_RELEASE = "binary_release" + BINARY_TIDY = "binary_tidy" + BINARY_DARWIN = "binary_darwin" + BINARY_AARCH64 = "binary_aarch64" + BINARY_AARCH64_V80COMPAT = "binary_aarch64_v80compat" + BINARY_FREEBSD = "binary_freebsd" + BINARY_DARWIN_AARCH64 = "binary_darwin_aarch64" + BINARY_PPC64LE = "binary_ppc64le" + BINARY_AMD64_COMPAT = "binary_amd64_compat" + BINARY_AMD64_MUSL = "binary_amd64_musl" + BINARY_RISCV64 = "binary_riscv64" + BINARY_S390X = "binary_s390x" + BINARY_LOONGARCH64 = "binary_loongarch64" + FUZZERS = "fuzzers" + + +class JobNames(metaclass=WithIter): + """ + All CI non-build jobs (Build jobs are concatenated to this list via python hack) + """ + + STYLE_CHECK = "Style check" + FAST_TEST = "Fast test" + DOCKER_SERVER = "Docker server image" + DOCKER_KEEPER = "Docker keeper image" + INSTALL_TEST_AMD = "Install packages (release)" + INSTALL_TEST_ARM = "Install packages (aarch64)" + + STATELESS_TEST_DEBUG = "Stateless tests (debug)" + STATELESS_TEST_RELEASE = "Stateless tests (release)" + STATELESS_TEST_RELEASE_COVERAGE = "Stateless tests (coverage)" + STATELESS_TEST_AARCH64 = "Stateless tests (aarch64)" + STATELESS_TEST_ASAN = "Stateless tests (asan)" + STATELESS_TEST_TSAN = "Stateless tests (tsan)" + STATELESS_TEST_MSAN = "Stateless tests (msan)" + STATELESS_TEST_UBSAN = "Stateless tests (ubsan)" + STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE = ( + "Stateless tests (release, old analyzer, s3, DatabaseReplicated)" + ) + STATELESS_TEST_S3_DEBUG = "Stateless tests (debug, s3 storage)" + STATELESS_TEST_S3_TSAN = "Stateless tests (tsan, s3 storage)" + STATELESS_TEST_AZURE_ASAN = "Stateless tests (azure, asan)" + STATELESS_TEST_FLAKY_ASAN = "Stateless tests flaky check (asan)" + + STATEFUL_TEST_DEBUG = "Stateful tests (debug)" + STATEFUL_TEST_RELEASE = "Stateful tests (release)" + STATEFUL_TEST_RELEASE_COVERAGE = "Stateful tests (coverage)" + STATEFUL_TEST_AARCH64 = "Stateful tests (aarch64)" + STATEFUL_TEST_ASAN = "Stateful tests (asan)" + STATEFUL_TEST_TSAN = "Stateful tests (tsan)" + STATEFUL_TEST_MSAN = "Stateful tests (msan)" + STATEFUL_TEST_UBSAN = "Stateful tests (ubsan)" + STATEFUL_TEST_PARALLEL_REPL_RELEASE = "Stateful tests (release, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_DEBUG = "Stateful tests (debug, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_ASAN = "Stateful tests (asan, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_MSAN = "Stateful tests (msan, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_UBSAN = "Stateful tests (ubsan, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_TSAN = "Stateful tests (tsan, ParallelReplicas)" + + STRESS_TEST_ASAN = "Stress test (asan)" + STRESS_TEST_TSAN = "Stress test (tsan)" + STRESS_TEST_UBSAN = "Stress test (ubsan)" + STRESS_TEST_MSAN = "Stress test (msan)" + STRESS_TEST_DEBUG = "Stress test (debug)" + STRESS_TEST_AZURE_TSAN = "Stress test (azure, tsan)" + STRESS_TEST_AZURE_MSAN = "Stress test (azure, msan)" + + INTEGRATION_TEST = "Integration tests (release)" + INTEGRATION_TEST_ASAN = "Integration tests (asan)" + INTEGRATION_TEST_ASAN_OLD_ANALYZER = "Integration tests (asan, old analyzer)" + INTEGRATION_TEST_TSAN = "Integration tests (tsan)" + INTEGRATION_TEST_ARM = "Integration tests (aarch64)" + INTEGRATION_TEST_FLAKY = "Integration tests flaky check (asan)" + + UPGRADE_TEST_DEBUG = "Upgrade check (debug)" + UPGRADE_TEST_ASAN = "Upgrade check (asan)" + UPGRADE_TEST_TSAN = "Upgrade check (tsan)" + UPGRADE_TEST_MSAN = "Upgrade check (msan)" + + UNIT_TEST = "Unit tests (release)" + UNIT_TEST_ASAN = "Unit tests (asan)" + UNIT_TEST_MSAN = "Unit tests (msan)" + UNIT_TEST_TSAN = "Unit tests (tsan)" + UNIT_TEST_UBSAN = "Unit tests (ubsan)" + + AST_FUZZER_TEST_DEBUG = "AST fuzzer (debug)" + AST_FUZZER_TEST_ASAN = "AST fuzzer (asan)" + AST_FUZZER_TEST_MSAN = "AST fuzzer (msan)" + AST_FUZZER_TEST_TSAN = "AST fuzzer (tsan)" + AST_FUZZER_TEST_UBSAN = "AST fuzzer (ubsan)" + + JEPSEN_KEEPER = "ClickHouse Keeper Jepsen" + JEPSEN_SERVER = "ClickHouse Server Jepsen" + + PERFORMANCE_TEST_AMD64 = "Performance Comparison (release)" + PERFORMANCE_TEST_ARM64 = "Performance Comparison (aarch64)" + + SQL_LOGIC_TEST = "Sqllogic test (release)" + + SQLANCER = "SQLancer (release)" + SQLANCER_DEBUG = "SQLancer (debug)" + SQLTEST = "SQLTest" + + COMPATIBILITY_TEST = "Compatibility check (release)" + COMPATIBILITY_TEST_ARM = "Compatibility check (aarch64)" + + CLICKBENCH_TEST = "ClickBench (release)" + CLICKBENCH_TEST_ARM = "ClickBench (aarch64)" + + LIBFUZZER_TEST = "libFuzzer tests" + + BUILD_CHECK = "Builds" + + DOCS_CHECK = "Docs check" + BUGFIX_VALIDATE = "Bugfix validation" + + +# hack to concatenate Build and non-build jobs under JobNames class +for attr_name in dir(BuildNames): + if not attr_name.startswith("__") and not callable(getattr(BuildNames, attr_name)): + setattr(JobNames, attr_name, getattr(BuildNames, attr_name)) + + +class StatusNames(metaclass=WithIter): + """ + Class with statuses that aren't related to particular jobs + """ + + # overall CI report + CI = "CI running" + # mergeable status + MERGEABLE = "Mergeable Check" + # status of a sync pr + SYNC = "Cloud fork sync (only for ClickHouse Inc. employees)" + # PR formatting check status + PR_CHECK = "PR Check" + + +class SyncState(metaclass=WithIter): + PENDING = "awaiting sync" + # temporary state if GH does not know mergeable state + MERGE_UNKNOWN = "unknown state (might be auto recoverable)" + # changes cannot be pushed/merged to a sync branch + PUSH_FAILED = "push failed" + MERGE_CONFLICTS = "merge conflicts" + TESTING = "awaiting test results" + TESTS_FAILED = "tests failed" + COMPLETED = "completed" + + +@dataclass +class DigestConfig: + # all files, dirs to include into digest, glob supported + include_paths: List[Union[str, Path]] = field(default_factory=list) + # file suffixes to exclude from digest + exclude_files: List[str] = field(default_factory=list) + # directories to exclude from digest + exclude_dirs: List[Union[str, Path]] = field(default_factory=list) + # docker names to include into digest + docker: List[str] = field(default_factory=list) + # git submodules digest + git_submodules: bool = False + + +@dataclass +class LabelConfig: + """ + configures different CI scenarios per CI Tag/GH label + """ + + run_jobs: Iterable[str] = frozenset() + + +@dataclass +class BuildConfig: + name: str + compiler: str + package_type: Literal["deb", "binary", "fuzzers"] + additional_pkgs: bool = False + debug_build: bool = False + coverage: bool = False + sanitizer: str = "" + tidy: bool = False + # sparse_checkout is needed only to test the option itself. + # No particular sense to use it in every build, since it slows down the job. + sparse_checkout: bool = False + comment: str = "" + static_binary_name: str = "" + + def export_env(self, export: bool = False) -> str: + def process(field_name: str, field: Union[bool, str]) -> str: + if isinstance(field, bool): + field = str(field).lower() + elif not isinstance(field, str): + field = "" + if export: + return f"export BUILD_{field_name.upper()}={repr(field)}" + return f"BUILD_{field_name.upper()}={field}" + + return "\n".join(process(k, v) for k, v in self.__dict__.items()) + + +@dataclass +class JobConfig: + """ + contains config parameters for job execution in CI workflow + """ + + # GH Runner type (tag from @Runners) + runner_type: str + # used for config validation in ci unittests + job_name_keyword: str = "" + # builds required for the job (applicable for test jobs) + required_builds: Optional[List[str]] = None + # build config for the build job (applicable for builds) + build_config: Optional[BuildConfig] = None + # configures digest calculation for the job + digest: DigestConfig = field(default_factory=DigestConfig) + # will be triggered for the job if omitted in CI workflow yml + run_command: str = "" + # job timeout, seconds + timeout: Optional[int] = None + # sets number of batches for a multi-batch job + num_batches: int = 1 + # label that enables job in CI, if set digest isn't used + run_by_label: str = "" + # to run always regardless of the job digest or/and label + run_always: bool = False + # if the job needs to be run on the release branch, including master (building packages, docker server). + # NOTE: Subsequent runs on the same branch with the similar digest are still considered skip-able. + required_on_release_branch: bool = False + # job is for pr workflow only + pr_only: bool = False + # job is for release/master branches only + release_only: bool = False + # to randomly pick and run one job among jobs in the same @random_bucket (PR branches only). + random_bucket: str = "" + # Do not set it. A list of batches to run. It will be set in runtime in accordance with ci cache and ci settings + batches: Optional[List[int]] = None + # Do not set it. A list of batches to await. It will be set in runtime in accordance with ci cache and ci settings + pending_batches: Optional[List[int]] = None + + def with_properties(self, **kwargs: Any) -> "JobConfig": + res = copy.deepcopy(self) + for k, v in kwargs.items(): + assert hasattr(self, k), f"Setting invalid attribute [{k}]" + setattr(res, k, v) + return res + + def get_required_build(self) -> str: + assert self.required_builds + return self.required_builds[0] + + +class CommonJobConfigs: + """ + Common job configs + """ + + BUILD_REPORT = JobConfig( + job_name_keyword="builds", + run_command="build_report_check.py", + digest=DigestConfig( + include_paths=[ + "./tests/ci/build_report_check.py", + "./tests/ci/upload_result_helper.py", + ], + ), + runner_type=Runners.STYLE_CHECKER_ARM, + ) + COMPATIBILITY_TEST = JobConfig( + job_name_keyword="compatibility", + digest=DigestConfig( + include_paths=["./tests/ci/compatibility_check.py"], + docker=["clickhouse/test-old-ubuntu", "clickhouse/test-old-centos"], + ), + run_command="compatibility_check.py", + runner_type=Runners.STYLE_CHECKER, + ) + INSTALL_TEST = JobConfig( + job_name_keyword="install", + digest=DigestConfig( + include_paths=["./tests/ci/install_check.py"], + docker=["clickhouse/install-deb-test", "clickhouse/install-rpm-test"], + ), + run_command='install_check.py "$CHECK_NAME"', + runner_type=Runners.STYLE_CHECKER, + timeout=900, + ) + STATELESS_TEST = JobConfig( + job_name_keyword="stateless", + digest=DigestConfig( + include_paths=[ + "./tests/ci/functional_test_check.py", + "./tests/queries/0_stateless/", + "./tests/clickhouse-test", + "./tests/config", + "./tests/*.txt", + ], + exclude_files=[".md"], + docker=["clickhouse/stateless-test"], + ), + run_command='functional_test_check.py "$CHECK_NAME"', + runner_type=Runners.FUNC_TESTER, + timeout=10800, + ) + STATEFUL_TEST = JobConfig( + job_name_keyword="stateful", + digest=DigestConfig( + include_paths=[ + "./tests/ci/functional_test_check.py", + "./tests/queries/1_stateful/", + "./tests/clickhouse-test", + "./tests/config", + "./tests/*.txt", + ], + exclude_files=[".md"], + docker=["clickhouse/stateful-test"], + ), + run_command='functional_test_check.py "$CHECK_NAME"', + runner_type=Runners.FUNC_TESTER, + timeout=3600, + ) + STRESS_TEST = JobConfig( + job_name_keyword="stress", + digest=DigestConfig( + include_paths=[ + "./tests/queries/0_stateless/", + "./tests/queries/1_stateful/", + "./tests/clickhouse-test", + "./tests/config", + "./tests/*.txt", + ], + exclude_files=[".md"], + docker=["clickhouse/stress-test"], + ), + run_command="stress_check.py", + runner_type=Runners.STRESS_TESTER, + timeout=9000, + ) + UPGRADE_TEST = JobConfig( + job_name_keyword="upgrade", + digest=DigestConfig( + include_paths=["./tests/ci/upgrade_check.py"], + exclude_files=[".md"], + docker=["clickhouse/upgrade-check"], + ), + run_command="upgrade_check.py", + runner_type=Runners.STRESS_TESTER, + ) + INTEGRATION_TEST = JobConfig( + job_name_keyword="integration", + digest=DigestConfig( + include_paths=[ + "./tests/ci/integration_test_check.py", + "./tests/ci/integration_tests_runner.py", + "./tests/integration/", + ], + exclude_files=[".md"], + docker=IMAGES.copy(), + ), + run_command='integration_test_check.py "$CHECK_NAME"', + runner_type=Runners.STRESS_TESTER, + ) + ASTFUZZER_TEST = JobConfig( + job_name_keyword="ast", + digest=DigestConfig(), + run_command="ast_fuzzer_check.py", + run_always=True, + runner_type=Runners.FUZZER_UNIT_TESTER, + ) + UNIT_TEST = JobConfig( + job_name_keyword="unit", + digest=DigestConfig( + include_paths=["./tests/ci/unit_tests_check.py"], + exclude_files=[".md"], + docker=["clickhouse/unit-test"], + ), + run_command="unit_tests_check.py", + runner_type=Runners.FUZZER_UNIT_TESTER, + ) + PERF_TESTS = JobConfig( + job_name_keyword="performance", + digest=DigestConfig( + include_paths=[ + "./tests/ci/performance_comparison_check.py", + "./tests/performance/", + ], + exclude_files=[".md"], + docker=["clickhouse/performance-comparison"], + ), + run_command="performance_comparison_check.py", + runner_type=Runners.STRESS_TESTER, + ) + SQLLANCER_TEST = JobConfig( + job_name_keyword="lancer", + digest=DigestConfig(), + run_command="sqlancer_check.py", + release_only=True, + run_always=True, + runner_type=Runners.FUZZER_UNIT_TESTER, + ) + SQLLOGIC_TEST = JobConfig( + job_name_keyword="logic", + digest=DigestConfig( + include_paths=["./tests/ci/sqllogic_test.py"], + exclude_files=[".md"], + docker=["clickhouse/sqllogic-test"], + ), + run_command="sqllogic_test.py", + timeout=10800, + release_only=True, + runner_type=Runners.STYLE_CHECKER, + ) + SQL_TEST = JobConfig( + job_name_keyword="sqltest", + digest=DigestConfig( + include_paths=["./tests/ci/sqltest.py"], + exclude_files=[".md"], + docker=["clickhouse/sqltest"], + ), + run_command="sqltest.py", + timeout=10800, + release_only=True, + runner_type=Runners.FUZZER_UNIT_TESTER, + ) + BUGFIX_TEST = JobConfig( + job_name_keyword="bugfix", + digest=DigestConfig(), + run_command="bugfix_validate_check.py", + timeout=900, + runner_type=Runners.FUNC_TESTER, + ) + DOCKER_SERVER = JobConfig( + job_name_keyword="docker", + required_on_release_branch=True, + run_command='docker_server.py --check-name "$CHECK_NAME" --release-type head --allow-build-reuse', + digest=DigestConfig( + include_paths=[ + "tests/ci/docker_server.py", + "./docker/server", + ] + ), + runner_type=Runners.STYLE_CHECKER, + ) + CLICKBENCH_TEST = JobConfig( + job_name_keyword="clickbench", + digest=DigestConfig( + include_paths=[ + "tests/ci/clickbench.py", + ], + docker=["clickhouse/clickbench"], + ), + run_command='clickbench.py "$CHECK_NAME"', + timeout=900, + runner_type=Runners.FUNC_TESTER, + ) + BUILD = JobConfig( + required_on_release_branch=True, + digest=DigestConfig( + include_paths=[ + "./src", + "./contrib/*-cmake", + "./contrib/consistent-hashing", + "./contrib/murmurhash", + "./contrib/libfarmhash", + "./contrib/pdqsort", + "./contrib/cityhash102", + "./contrib/sparse-checkout", + "./contrib/libmetrohash", + "./contrib/update-submodules.sh", + "./contrib/CMakeLists.txt", + "./CMakeLists.txt", + "./PreLoad.cmake", + "./cmake", + "./base", + "./programs", + "./packages", + "./docker/packager/packager", + "./rust", + "./tests/ci/version_helper.py", + # FIXME: This is a WA to rebuild the CH and recreate the Performance.tar.zst artifact + # when there are changes in performance test scripts. + # Due to the current design of the perf test we need to rebuild CH when the performance test changes, + # otherwise the changes will not be visible in the PerformanceTest job in CI + "./tests/performance", + ], + exclude_files=[".md"], + docker=["clickhouse/binary-builder"], + git_submodules=True, + ), + run_command="build_check.py $BUILD_NAME", + runner_type=Runners.BUILDER, + ) + + +REQUIRED_CHECKS = [ + StatusNames.PR_CHECK, + StatusNames.SYNC, + JobNames.BUILD_CHECK, + JobNames.DOCS_CHECK, + JobNames.FAST_TEST, + JobNames.STATEFUL_TEST_RELEASE, + JobNames.STATELESS_TEST_RELEASE, + JobNames.STATELESS_TEST_ASAN, + JobNames.STATELESS_TEST_FLAKY_ASAN, + JobNames.STATEFUL_TEST_ASAN, + JobNames.STYLE_CHECK, + JobNames.UNIT_TEST_ASAN, + JobNames.UNIT_TEST_MSAN, + JobNames.UNIT_TEST, + JobNames.UNIT_TEST_TSAN, + JobNames.UNIT_TEST_UBSAN, + JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER, + JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE, +] + +# Jobs that run in Merge Queue if it's enabled +MQ_JOBS = [ + JobNames.STYLE_CHECK, + JobNames.FAST_TEST, + BuildNames.BINARY_RELEASE, + JobNames.UNIT_TEST, +] + + +@dataclass +class CheckDescription: + name: str + description: str # the check descriptions, will be put into the status table + match_func: Callable[[str], bool] # the function to check vs the commit status + + def __hash__(self) -> int: + return hash(self.name + self.description) + + +CHECK_DESCRIPTIONS = [ + CheckDescription( + StatusNames.PR_CHECK, + "Checks correctness of the PR's body", + lambda x: x == "PR Check", + ), + CheckDescription( + StatusNames.SYNC, + "If it fails, ask a maintainer for help", + lambda x: x == StatusNames.SYNC, + ), + CheckDescription( + "AST fuzzer", + "Runs randomly generated queries to catch program errors. " + "The build type is optionally given in parenthesis. " + "If it fails, ask a maintainer for help", + lambda x: x.startswith("AST fuzzer"), + ), + CheckDescription( + JobNames.BUGFIX_VALIDATE, + "Checks that either a new test (functional or integration) or there " + "some changed tests that fail with the binary built on master branch", + lambda x: x == JobNames.BUGFIX_VALIDATE, + ), + CheckDescription( + StatusNames.CI, + "A meta-check that indicates the running CI. Normally, it's in success or " + "pending state. The failed status indicates some problems with the PR", + lambda x: x == "CI running", + ), + CheckDescription( + "Builds", + "Builds ClickHouse in various configurations for use in further steps. " + "You have to fix the builds that fail. Build logs often has enough " + "information to fix the error, but you might have to reproduce the failure " + "locally. The cmake options can be found in the build log, grepping for " + 'cmake. Use these options and follow the general build process', + lambda x: x.startswith("ClickHouse") and x.endswith("build check"), + ), + CheckDescription( + "Compatibility check", + "Checks that clickhouse binary runs on distributions with old libc " + "versions. If it fails, ask a maintainer for help", + lambda x: x.startswith("Compatibility check"), + ), + CheckDescription( + JobNames.DOCKER_SERVER, + "The check to build and optionally push the mentioned image to docker hub", + lambda x: x.startswith("Docker server"), + ), + CheckDescription( + JobNames.DOCKER_KEEPER, + "The check to build and optionally push the mentioned image to docker hub", + lambda x: x.startswith("Docker keeper"), + ), + CheckDescription( + JobNames.DOCS_CHECK, + "Builds and tests the documentation", + lambda x: x == JobNames.DOCS_CHECK, + ), + CheckDescription( + JobNames.FAST_TEST, + "Normally this is the first check that is ran for a PR. It builds ClickHouse " + 'and runs most of stateless functional tests, ' + "omitting some. If it fails, further checks are not started until it is fixed. " + "Look at the report to see which tests fail, then reproduce the failure " + 'locally as described here', + lambda x: x == JobNames.FAST_TEST, + ), + CheckDescription( + "Flaky tests", + "Checks if new added or modified tests are flaky by running them repeatedly, " + "in parallel, with more randomization. Functional tests are run 100 times " + "with address sanitizer, and additional randomization of thread scheduling. " + "Integration tests are run up to 10 times. If at least once a new test has " + "failed, or was too long, this check will be red. We don't allow flaky tests, " + 'read the doc', + lambda x: "tests flaky check" in x, + ), + CheckDescription( + "Install packages", + "Checks that the built packages are installable in a clear environment", + lambda x: x.startswith("Install packages ("), + ), + CheckDescription( + "Integration tests", + "The integration tests report. In parenthesis the package type is given, " + "and in square brackets are the optional part/total tests", + lambda x: x.startswith("Integration tests ("), + ), + CheckDescription( + StatusNames.MERGEABLE, + "Checks if all other necessary checks are successful", + lambda x: x == StatusNames.MERGEABLE, + ), + CheckDescription( + "Performance Comparison", + "Measure changes in query performance. The performance test report is " + 'described in detail here. ' + "In square brackets are the optional part/total tests", + lambda x: x.startswith("Performance Comparison"), + ), + CheckDescription( + "Push to Dockerhub", + "The check for building and pushing the CI related docker images to docker hub", + lambda x: x.startswith("Push") and "to Dockerhub" in x, + ), + CheckDescription( + "Sqllogic", + "Run clickhouse on the " + 'sqllogic ' + "test set against sqlite and checks that all statements are passed", + lambda x: x.startswith("Sqllogic test"), + ), + CheckDescription( + "SQLancer", + "Fuzzing tests that detect logical bugs with " + 'SQLancer tool', + lambda x: x.startswith("SQLancer"), + ), + CheckDescription( + "Stateful tests", + "Runs stateful functional tests for ClickHouse binaries built in various " + "configurations -- release, debug, with sanitizers, etc", + lambda x: x.startswith("Stateful tests ("), + ), + CheckDescription( + "Stateless tests", + "Runs stateless functional tests for ClickHouse binaries built in various " + "configurations -- release, debug, with sanitizers, etc", + lambda x: x.startswith("Stateless tests ("), + ), + CheckDescription( + "Stress test", + "Runs stateless functional tests concurrently from several clients to detect " + "concurrency-related errors", + lambda x: x.startswith("Stress test ("), + ), + CheckDescription( + JobNames.STYLE_CHECK, + "Runs a set of checks to keep the code style clean. If some of tests failed, " + "see the related log from the report", + lambda x: x == JobNames.STYLE_CHECK, + ), + CheckDescription( + "Unit tests", + "Runs the unit tests for different release types", + lambda x: x.startswith("Unit tests ("), + ), + CheckDescription( + "Upgrade check", + "Runs stress tests on server version from last release and then tries to " + "upgrade it to the version from the PR. It checks if the new server can " + "successfully startup without any errors, crashes or sanitizer asserts", + lambda x: x.startswith("Upgrade check ("), + ), + CheckDescription( + "ClickBench", + "Runs [ClickBench](https://github.com/ClickHouse/ClickBench/) with instant-attach table", + lambda x: x.startswith("ClickBench"), + ), + CheckDescription( + "Fallback for unknown", + "There's no description for the check yet, please add it to " + "tests/ci/ci_config.py:CHECK_DESCRIPTIONS", + lambda x: True, + ), +] diff --git a/tests/ci/ci_settings.py b/tests/ci/ci_settings.py index f25344c7701..7b2dd12c310 100644 --- a/tests/ci/ci_settings.py +++ b/tests/ci/ci_settings.py @@ -3,7 +3,7 @@ from dataclasses import dataclass, asdict from typing import Optional, List, Dict, Any, Iterable from ci_utils import normalize_string -from ci_config import CILabels, CI_CONFIG, JobConfig, JobNames +from ci_config import CI from git_helper import Runner as GitRunner, GIT_PREFIX from pr_info import PRInfo @@ -29,6 +29,7 @@ class CiSettings: no_ci_cache: bool = False upload_all: bool = False no_merge_commit: bool = False + woolen_wolfdog: bool = False def as_dict(self) -> Dict[str, Any]: return asdict(self) @@ -80,7 +81,7 @@ class CiSettings: if not res.ci_jobs: res.ci_jobs = [] res.ci_jobs.append(match.removeprefix("job_")) - elif match.startswith("ci_set_") and match in CILabels: + elif match.startswith("ci_set_") and match in CI.Tags: if not res.ci_sets: res.ci_sets = [] res.ci_sets.append(match) @@ -97,17 +98,20 @@ class CiSettings: res.exclude_keywords += [ normalize_string(keyword) for keyword in keywords ] - elif match == CILabels.NO_CI_CACHE: + elif match == CI.Tags.NO_CI_CACHE: res.no_ci_cache = True print("NOTE: CI Cache will be disabled") - elif match == CILabels.UPLOAD_ALL_ARTIFACTS: + elif match == CI.Tags.UPLOAD_ALL_ARTIFACTS: res.upload_all = True print("NOTE: All binary artifacts will be uploaded") - elif match == CILabels.DO_NOT_TEST_LABEL: + elif match == CI.Tags.DO_NOT_TEST_LABEL: res.do_not_test = True - elif match == CILabels.NO_MERGE_COMMIT: + elif match == CI.Tags.NO_MERGE_COMMIT: res.no_merge_commit = True print("NOTE: Merge Commit will be disabled") + elif match == CI.Tags.WOOLEN_WOLFDOG_LABEL: + res.woolen_wolfdog = True + print("NOTE: Woolen Wolfdog mode enabled") elif match.startswith("batch_"): batches = [] try: @@ -131,18 +135,18 @@ class CiSettings: def _check_if_selected( self, job: str, - job_config: JobConfig, + job_config: CI.JobConfig, is_release: bool, is_pr: bool, is_mq: bool, labels: Iterable[str], ) -> bool: # type: ignore #too-many-return-statements if self.do_not_test: - label_config = CI_CONFIG.get_label_config(CILabels.DO_NOT_TEST_LABEL) - assert label_config, f"Unknown tag [{CILabels.DO_NOT_TEST_LABEL}]" + label_config = CI.get_tag_config(CI.Tags.DO_NOT_TEST_LABEL) + assert label_config, f"Unknown tag [{CI.Tags.DO_NOT_TEST_LABEL}]" if job in label_config.run_jobs: print( - f"Job [{job}] present in CI set [{CILabels.DO_NOT_TEST_LABEL}] - pass" + f"Job [{job}] present in CI set [{CI.Tags.DO_NOT_TEST_LABEL}] - pass" ) return True return False @@ -164,7 +168,7 @@ class CiSettings: to_deny = False if self.include_keywords: - if job == JobNames.STYLE_CHECK: + if job == CI.JobNames.STYLE_CHECK: # never exclude Style Check by include keywords return True for keyword in self.include_keywords: @@ -175,7 +179,7 @@ class CiSettings: if self.ci_sets: for tag in self.ci_sets: - label_config = CI_CONFIG.get_label_config(tag) + label_config = CI.get_tag_config(tag) assert label_config, f"Unknown tag [{tag}]" if job in label_config.run_jobs: print(f"Job [{job}] present in CI set [{tag}] - pass") @@ -197,12 +201,12 @@ class CiSettings: def apply( self, - job_configs: Dict[str, JobConfig], + job_configs: Dict[str, CI.JobConfig], is_release: bool, is_pr: bool, is_mq: bool, labels: Iterable[str], - ) -> Dict[str, JobConfig]: + ) -> Dict[str, CI.JobConfig]: """ Apply CI settings from pr body """ @@ -220,7 +224,7 @@ class CiSettings: add_parents = [] for job in list(res): - parent_jobs = CI_CONFIG.get_job_parents(job) + parent_jobs = CI.get_job_parents(job) for parent_job in parent_jobs: if parent_job not in res: add_parents.append(parent_job) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index a0d6495452f..fdc9c002b66 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -17,9 +17,8 @@ from github.GithubObject import NotSet from github.IssueComment import IssueComment from github.Repository import Repository -from ci_config import CHECK_DESCRIPTIONS, CheckDescription, StatusNames, CIConfig -from env_helper import GITHUB_REPOSITORY, GITHUB_UPSTREAM_REPOSITORY, TEMP_PATH -from lambda_shared_package.lambda_shared.pr import Labels +from ci_config import CI +from env_helper import GITHUB_REPOSITORY, TEMP_PATH from pr_info import PRInfo from report import ( ERROR, @@ -29,7 +28,6 @@ from report import ( StatusType, TestResult, TestResults, - get_status, get_worst_status, ) from s3_helper import S3Helper @@ -103,7 +101,12 @@ def post_commit_status( if i == RETRY - 1: raise ex time.sleep(i) - if pr_info: + if pr_info and check_name not in ( + CI.StatusNames.MERGEABLE, + CI.StatusNames.CI, + CI.StatusNames.PR_CHECK, + CI.StatusNames.SYNC, + ): status_updated = False for i in range(RETRY): try: @@ -157,10 +160,21 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None: gh.__requester = commit._requester # type:ignore #pylint:disable=protected-access repo = get_repo(gh) statuses = sorted(get_commit_filtered_statuses(commit), key=lambda x: x.context) + statuses = [ + status + for status in statuses + if status.context + not in ( + CI.StatusNames.MERGEABLE, + CI.StatusNames.CI, + CI.StatusNames.PR_CHECK, + CI.StatusNames.SYNC, + ) + ] if not statuses: return - if not [status for status in statuses if status.context == StatusNames.CI]: + if not [status for status in statuses if status.context == CI.StatusNames.CI]: # This is the case, when some statuses already exist for the check, # but not the StatusNames.CI. We should create it as pending. # W/o pr_info to avoid recursion, and yes, one extra create_ci_report @@ -169,7 +183,7 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None: PENDING, create_ci_report(pr_info, statuses), "The report for running CI", - StatusNames.CI, + CI.StatusNames.CI, ) # We update the report in generate_status_comment function, so do it each @@ -212,20 +226,20 @@ def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str: f"\n" ) # group checks by the name to get the worst one per each - grouped_statuses = {} # type: Dict[CheckDescription, CommitStatuses] + grouped_statuses = {} # type: Dict[CI.CheckDescription, CommitStatuses] for status in statuses: cd = None - for c in CHECK_DESCRIPTIONS: + for c in CI.CHECK_DESCRIPTIONS: if c.match_func(status.context): cd = c break - if cd is None or cd == CHECK_DESCRIPTIONS[-1]: + if cd is None or cd == CI.CHECK_DESCRIPTIONS[-1]: # This is the case for either non-found description or a fallback - cd = CheckDescription( + cd = CI.CheckDescription( status.context, - CHECK_DESCRIPTIONS[-1].description, - CHECK_DESCRIPTIONS[-1].match_func, + CI.CHECK_DESCRIPTIONS[-1].description, + CI.CHECK_DESCRIPTIONS[-1].match_func, ) if cd in grouped_statuses: @@ -301,7 +315,7 @@ def create_ci_report(pr_info: PRInfo, statuses: CommitStatuses) -> str: ) ) return upload_results( - S3Helper(), pr_info.number, pr_info.sha, test_results, [], StatusNames.CI + S3Helper(), pr_info.number, pr_info.sha, test_results, [], CI.StatusNames.CI ) @@ -435,43 +449,22 @@ def set_mergeable_check( state, report_url, format_description(description), - StatusNames.MERGEABLE, + CI.StatusNames.MERGEABLE, ) -def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> None: - "check if the check_name in REQUIRED_CHECKS and then trigger update" - not_run = ( - pr_info.labels.intersection({Labels.SKIP_MERGEABLE_CHECK, Labels.RELEASE}) - or not CIConfig.is_required(check_name) - or pr_info.release_pr - or pr_info.number == 0 - ) - - if not_run: - # Let's avoid unnecessary work - return - - logging.info("Update Mergeable Check by %s", check_name) - - statuses = get_commit_filtered_statuses(commit) - trigger_mergeable_check(commit, statuses) - - def trigger_mergeable_check( commit: Commit, statuses: CommitStatuses, - set_if_green: bool = False, + set_from_sync: bool = False, workflow_failed: bool = False, ) -> StatusType: """calculate and update StatusNames.MERGEABLE""" - required_checks = [ - status for status in statuses if CIConfig.is_required(status.context) - ] + required_checks = [status for status in statuses if CI.is_required(status.context)] mergeable_status = None for status in statuses: - if status.context == StatusNames.MERGEABLE: + if status.context == CI.StatusNames.MERGEABLE: mergeable_status = status break @@ -503,63 +496,43 @@ def trigger_mergeable_check( description = format_description(description) - if not set_if_green and state == SUCCESS: - # do not set green Mergeable Check status - pass - else: - if mergeable_status is None or mergeable_status.description != description: + if set_from_sync: + # update Mergeable Check from sync WF only if its status already present or its new status is not SUCCESS + # to avoid false-positives + if mergeable_status or state != SUCCESS: set_mergeable_check(commit, description, state) + elif mergeable_status is None or mergeable_status.description != description: + set_mergeable_check(commit, description, state) return state def update_upstream_sync_status( - upstream_pr_number: int, - sync_pr_number: int, - gh: Github, + pr_info: PRInfo, state: StatusType, - can_set_green_mergeable_status: bool = False, ) -> None: - upstream_repo = gh.get_repo(GITHUB_UPSTREAM_REPOSITORY) - upstream_pr = upstream_repo.get_pull(upstream_pr_number) - sync_repo = gh.get_repo(GITHUB_REPOSITORY) - sync_pr = sync_repo.get_pull(sync_pr_number) - # Find the commit that is in both repos, upstream and cloud - sync_commits = sync_pr.get_commits().reversed - upstream_commits = upstream_pr.get_commits().reversed - # Github objects are compared by _url attribute. We can't compare them directly and - # should compare commits by SHA1 - upstream_shas = [c.sha for c in upstream_commits] - logging.info("Commits in upstream PR:\n %s", ", ".join(upstream_shas)) - sync_shas = [c.sha for c in sync_commits] - logging.info("Commits in sync PR:\n %s", ", ".join(reversed(sync_shas))) + last_synced_upstream_commit = pr_info.get_latest_sync_commit() - # find latest synced commit - last_synced_upstream_commit = None - for commit in upstream_commits: - if commit.sha in sync_shas: - last_synced_upstream_commit = commit - break - - assert last_synced_upstream_commit - - sync_status = get_status(state) logging.info( - "Using commit %s to post the %s status `%s`: [%s]", + "Using commit [%s] to post the [%s] status [%s]", last_synced_upstream_commit.sha, - sync_status, - StatusNames.SYNC, - "", + state, + CI.StatusNames.SYNC, ) + if state == SUCCESS: + description = CI.SyncState.COMPLETED + else: + description = CI.SyncState.TESTS_FAILED + post_commit_status( last_synced_upstream_commit, - sync_status, + state, "", - "", - StatusNames.SYNC, + description, + CI.StatusNames.SYNC, ) trigger_mergeable_check( last_synced_upstream_commit, get_commit_filtered_statuses(last_synced_upstream_commit), - set_if_green=can_set_green_mergeable_status, + set_from_sync=True, ) diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index e7fee827320..bb0c717160e 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -196,7 +196,7 @@ def main(): # See https://sourceware.org/glibc/wiki/Glibc%20Timeline max_glibc_version = "" - if "amd64" in check_name: + if "amd64" in check_name or "release" in check_name: max_glibc_version = "2.4" elif "aarch64" in check_name: max_glibc_version = "2.18" # because of build with newer sysroot? diff --git a/tests/ci/digest_helper.py b/tests/ci/digest_helper.py index 8d6ec127f6e..4dcfb03c04f 100644 --- a/tests/ci/digest_helper.py +++ b/tests/ci/digest_helper.py @@ -9,10 +9,10 @@ from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Union from sys import modules from docker_images_helper import get_images_info -from ci_config import DigestConfig from git_helper import Runner from env_helper import ROOT_DIR from ci_utils import cd +from ci_config import CI DOCKER_DIGEST_LEN = 12 JOB_DIGEST_LEN = 10 @@ -139,20 +139,21 @@ class DockerDigester: class JobDigester: - def __init__(self): + def __init__(self, dry_run: bool = False): self.dd = DockerDigester() self.cache: Dict[str, str] = {} + self.dry_run = dry_run @staticmethod - def _get_config_hash(digest_config: DigestConfig) -> str: + def _get_config_hash(digest_config: CI.DigestConfig) -> str: data_dict = asdict(digest_config) hash_obj = md5() hash_obj.update(str(data_dict).encode()) hash_string = hash_obj.hexdigest() return hash_string - def get_job_digest(self, digest_config: DigestConfig) -> str: - if not digest_config.include_paths: + def get_job_digest(self, digest_config: CI.DigestConfig) -> str: + if not digest_config.include_paths or self.dry_run: # job is not for digest return "f" * JOB_DIGEST_LEN diff --git a/tests/ci/download_binary.py b/tests/ci/download_binary.py index 79db1e57d62..b0b5659ca83 100755 --- a/tests/ci/download_binary.py +++ b/tests/ci/download_binary.py @@ -8,7 +8,7 @@ import logging from pathlib import Path from build_download_helper import download_build_with_progress -from ci_config import CI_CONFIG +from ci_config import CI from env_helper import RUNNER_TEMP, S3_ARTIFACT_DOWNLOAD_TEMPLATE from git_helper import Git, commit from version_helper import get_version_from_repo, version_arg @@ -59,7 +59,8 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) for build in args.build_names: # check if it's in CI_CONFIG - config = CI_CONFIG.build_config[build] + config = CI.JOB_CONFIGS[build].build_config + assert config if args.rename and config.static_binary_name: path = temp_path / f"clickhouse-{config.static_binary_name}" else: diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index 36732bd7c9f..5217e4035da 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -9,8 +9,9 @@ from build_download_helper import APIException, get_gh_api module_dir = p.abspath(p.dirname(__file__)) git_root = p.abspath(p.join(module_dir, "..", "..")) + ROOT_DIR = git_root -CI = bool(os.getenv("CI")) +IS_CI = bool(os.getenv("CI")) TEMP_PATH = os.getenv("TEMP_PATH", p.abspath(p.join(module_dir, "./tmp"))) REPORT_PATH = f"{TEMP_PATH}/reports" # FIXME: latest should not be used in CI, set temporary for transition to "docker with digest as a tag" diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 904b565ad86..385caccc8cd 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -4,20 +4,15 @@ import logging from github import Github -from ci_config import StatusNames +from ci_config import CI from commit_status_helper import ( get_commit, get_commit_filtered_statuses, post_commit_status, - set_mergeable_check, - trigger_mergeable_check, - update_upstream_sync_status, ) -from env_helper import GITHUB_REPOSITORY, GITHUB_UPSTREAM_REPOSITORY from get_robot_token import get_best_robot_token from pr_info import PRInfo from report import FAILURE, PENDING, SUCCESS, StatusType -from synchronizer_utils import SYNC_BRANCH_PREFIX def parse_args() -> argparse.Namespace: @@ -45,33 +40,9 @@ def main(): gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - if pr_info.is_merge_queue: - # in MQ Mergeable check status must never be green if any failures in the workflow - if has_workflow_failures: - set_mergeable_check(commit, "workflow failed", FAILURE) - else: - # This must be the only place where green MCheck is set in the MQ (in the end of CI) to avoid early merge - set_mergeable_check(commit, "workflow passed", SUCCESS) - return - statuses = get_commit_filtered_statuses(commit) - state = trigger_mergeable_check(commit, statuses, set_if_green=True) - # Process upstream StatusNames.SYNC - if ( - pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") - and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY - ): - upstream_pr_number = int(pr_info.head_ref.split("/pr/", maxsplit=1)[1]) - update_upstream_sync_status( - upstream_pr_number, - pr_info.number, - gh, - state, - can_set_green_mergeable_status=True, - ) - - ci_running_statuses = [s for s in statuses if s.context == StatusNames.CI] + ci_running_statuses = [s for s in statuses if s.context == CI.StatusNames.CI] if not ci_running_statuses: return # Take the latest status @@ -81,7 +52,11 @@ def main(): has_pending = False error_cnt = 0 for status in statuses: - if status.context in (StatusNames.MERGEABLE, StatusNames.CI, StatusNames.SYNC): + if status.context in ( + CI.StatusNames.MERGEABLE, + CI.StatusNames.CI, + CI.StatusNames.SYNC, + ): # do not account these statuses continue if status.state == PENDING: @@ -108,7 +83,7 @@ def main(): ci_state, ci_status.target_url, description, - StatusNames.CI, + CI.StatusNames.CI, pr_info, dump_to_file=True, ) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 9678efd8631..ee459ce35a0 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -122,6 +122,10 @@ def _get_statless_tests_to_run(pr_info: PRInfo) -> List[str]: for fpath in pr_info.changed_files: if re.match(r"tests/queries/0_stateless/[0-9]{5}", fpath): + path_ = Path(REPO_COPY + "/" + fpath) + if not path_.exists(): + logging.info("File '%s' is removed - skip", fpath) + continue logging.info("File '%s' is changed and seems like a test", fpath) fname = fpath.split("/")[3] fname_without_ext = os.path.splitext(fname)[0] diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 4abaeac30b7..87f721cfde7 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -18,7 +18,7 @@ from collections import defaultdict from itertools import chain from typing import Any, Dict -from env_helper import CI +from env_helper import IS_CI from integration_test_images import IMAGES MAX_RETRY = 1 @@ -1004,7 +1004,7 @@ def run(): logging.info("Running tests") - if CI: + if IS_CI: # Avoid overlaps with previous runs logging.info("Clearing dmesg before run") subprocess.check_call("sudo -E dmesg --clear", shell=True) @@ -1012,7 +1012,7 @@ def run(): state, description, test_results, _ = runner.run_impl(repo_path, build_path) logging.info("Tests finished") - if CI: + if IS_CI: # Dump dmesg (to capture possible OOMs) logging.info("Dumping dmesg") subprocess.check_call("sudo -E dmesg -T", shell=True) diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index 1e61fd9fab7..f91a3f080c0 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -13,7 +13,6 @@ import requests from build_download_helper import ( download_build_with_progress, - get_build_name_for_check, read_build_urls, ) from compress_files import compress_fast @@ -25,6 +24,7 @@ from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults from ssh import SSHKey from stopwatch import Stopwatch from tee_popen import TeePopen +from ci_config import CI JEPSEN_GROUP_NAME = "jepsen_group" @@ -224,7 +224,7 @@ def main(): head = requests.head(build_url, timeout=60) assert head.status_code == 200, f"Clickhouse binary not found: {build_url}" else: - build_name = get_build_name_for_check(check_name) + build_name = CI.get_required_build_name(check_name) urls = read_build_urls(build_name, REPORT_PATH) build_url = None for url in urls: diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py index f9860f6ad2a..9749122bd39 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/token.py +++ b/tests/ci/lambda_shared_package/lambda_shared/token.py @@ -63,7 +63,10 @@ def get_access_token_by_key_app(private_key: str, app_id: int) -> str: "iss": app_id, } - encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256") + # FIXME: apparently should be switched to this so that mypy is happy + # jwt_instance = JWT() + # encoded_jwt = jwt_instance.encode(payload, private_key, algorithm="RS256") + encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256") # type: ignore installation_id = get_installation_id(encoded_jwt) return get_access_token_by_jwt(encoded_jwt, installation_id) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index e1c7bf94ff5..37c08fc4efe 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -4,6 +4,7 @@ import argparse import logging +import sys from datetime import datetime from os import getenv from pprint import pformat @@ -17,11 +18,14 @@ from commit_status_helper import ( get_commit_filtered_statuses, get_commit, trigger_mergeable_check, + update_upstream_sync_status, ) from get_robot_token import get_best_robot_token from github_helper import GitHub, NamedUser, PullRequest, Repository from pr_info import PRInfo -from report import SUCCESS +from report import SUCCESS, FAILURE +from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY +from synchronizer_utils import SYNC_BRANCH_PREFIX # The team name for accepted approvals TEAM_NAME = getenv("GITHUB_TEAM_NAME", "core") @@ -243,17 +247,29 @@ def main(): repo = gh.get_repo(args.repo) if args.set_ci_status: - assert args.wf_status in ("failure", "success") + assert args.wf_status in (FAILURE, SUCCESS) # set mergeable check status and exit commit = get_commit(gh, args.pr_info.sha) statuses = get_commit_filtered_statuses(commit) - trigger_mergeable_check( + state = trigger_mergeable_check( commit, statuses, - set_if_green=True, workflow_failed=(args.wf_status != "success"), ) - return + + # Process upstream StatusNames.SYNC + pr_info = PRInfo() + if ( + pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") + and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY + ): + print("Updating upstream statuses") + update_upstream_sync_status(pr_info, state) + + if args.wf_status != "success": + # exit with 1 to rerun on workflow failed job restart + sys.exit(1) + sys.exit(0) # An ugly and not nice fix to patch the wrong organization URL, # see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710 diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 0c779b515bd..b3488ac0af2 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -12,7 +12,7 @@ from pathlib import Path from github import Github from build_download_helper import download_builds_filter -from ci_config import CI_CONFIG +from ci_config import CI from clickhouse_helper import get_instance_id, get_instance_type from commit_status_helper import get_commit from docker_images_helper import get_docker_image, pull_image @@ -83,7 +83,7 @@ def main(): assert ( check_name ), "Check name must be provided as an input arg or in CHECK_NAME env" - required_build = CI_CONFIG.test_configs[check_name].required_build + required_build = CI.JOB_CONFIGS[check_name].get_required_build() with open(GITHUB_EVENT_PATH, "r", encoding="utf-8") as event_file: event = json.load(event_file) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index dda5b30f1e3..a411fc4e8f6 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -13,8 +13,11 @@ from env_helper import ( GITHUB_REPOSITORY, GITHUB_RUN_URL, GITHUB_SERVER_URL, + GITHUB_UPSTREAM_REPOSITORY, ) from lambda_shared_package.lambda_shared.pr import Labels +from get_robot_token import get_best_robot_token +from github_helper import GitHub NeedsDataType = Dict[str, Dict[str, Union[str, Dict[str, str]]]] @@ -316,7 +319,9 @@ class PRInfo: @property def is_master(self) -> bool: - return self.number == 0 and self.head_ref == "master" + return ( + self.number == 0 and self.head_ref == "master" and not self.is_merge_queue + ) @property def is_release(self) -> bool: @@ -324,7 +329,10 @@ class PRInfo: @property def is_pr(self): - return self.event_type == EventType.PULL_REQUEST + if self.event_type == EventType.PULL_REQUEST: + assert self.number + return True + return False @property def is_scheduled(self) -> bool: @@ -353,9 +361,6 @@ class PRInfo: if self.changed_files_requested: return - if not getattr(self, "diff_urls", False): - raise TypeError("The event does not have diff URLs") - for diff_url in self.diff_urls: response = get_gh_api( diff_url, @@ -430,6 +435,34 @@ class PRInfo: return True return False + def get_latest_sync_commit(self): + gh = GitHub(get_best_robot_token(), per_page=100) + assert self.head_ref.startswith("sync-upstream/pr/") + assert self.repo_full_name != GITHUB_UPSTREAM_REPOSITORY + upstream_repo = gh.get_repo(GITHUB_UPSTREAM_REPOSITORY) + upstream_pr_number = int(self.head_ref.split("/pr/", maxsplit=1)[1]) + upstream_pr = upstream_repo.get_pull(upstream_pr_number) + sync_repo = gh.get_repo(GITHUB_REPOSITORY) + sync_pr = sync_repo.get_pull(self.number) + # Find the commit that is in both repos, upstream and cloud + sync_commits = sync_pr.get_commits().reversed + upstream_commits = upstream_pr.get_commits().reversed + # Github objects are compared by _url attribute. We can't compare them directly and + # should compare commits by SHA1 + upstream_shas = [c.sha for c in upstream_commits] + logging.info("Commits in upstream PR:\n %s", ", ".join(upstream_shas)) + sync_shas = [c.sha for c in sync_commits] + logging.info("Commits in sync PR:\n %s", ", ".join(reversed(sync_shas))) + + # find latest synced commit + last_synced_upstream_commit = None + for commit in upstream_commits: + if commit.sha in sync_shas: + last_synced_upstream_commit = commit + break + assert last_synced_upstream_commit + return last_synced_upstream_commit + class FakePRInfo: def __init__(self): diff --git a/tests/ci/report.py b/tests/ci/report.py index ee58efdba52..bdaa2e15130 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -21,7 +21,7 @@ from typing import ( ) from build_download_helper import get_gh_api -from ci_config import CI_CONFIG, BuildConfig +from ci_config import CI from ci_utils import normalize_string from env_helper import REPORT_PATH, TEMP_PATH @@ -412,6 +412,7 @@ class BuildResult: ref_report = None master_report = None any_report = None + Path(REPORT_PATH).mkdir(parents=True, exist_ok=True) for file in Path(REPORT_PATH).iterdir(): if f"{build_name}.json" in file.name: any_report = file @@ -448,8 +449,10 @@ class BuildResult: return json.dumps(asdict(self), indent=2) @property - def build_config(self) -> Optional[BuildConfig]: - return CI_CONFIG.build_config.get(self.build_name, None) + def build_config(self) -> Optional[CI.BuildConfig]: + if self.build_name not in CI.JOB_CONFIGS: + return None + return CI.JOB_CONFIGS[self.build_name].build_config @property def comment(self) -> str: diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 131cbeef786..00942352dde 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -5,7 +5,6 @@ from typing import Tuple from github import Github -from ci_config import StatusNames from commit_status_helper import ( create_ci_report, format_description, @@ -24,6 +23,7 @@ from lambda_shared_package.lambda_shared.pr import ( ) from pr_info import PRInfo from report import FAILURE, PENDING, SUCCESS, StatusType +from ci_config import CI TRUSTED_ORG_IDS = { 54801242, # clickhouse @@ -208,7 +208,7 @@ def main(): PENDING, ci_report_url, description, - StatusNames.CI, + CI.StatusNames.CI, pr_info, ) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index bff53f00ad3..86656e6e7c0 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -11,7 +11,7 @@ import boto3 # type: ignore import botocore # type: ignore from compress_files import compress_file_fast from env_helper import ( - CI, + IS_CI, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOWNLOAD, @@ -104,20 +104,20 @@ class S3Helper: self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata) url = self.s3_url(bucket_name, s3_path) - logging.info("Upload %s to %s. Meta: %s", file_path, url, metadata) + logging.info("Upload %s to %s Meta: %s", file_path, url, metadata) return url def delete_file_from_s3(self, bucket_name: str, s3_path: str) -> None: self.client.delete_object(Bucket=bucket_name, Key=s3_path) def upload_test_report_to_s3(self, file_path: Path, s3_path: str) -> str: - if CI: + if IS_CI: return self._upload_file_to_s3(S3_TEST_REPORTS_BUCKET, file_path, s3_path) return S3Helper.copy_file_to_local(S3_TEST_REPORTS_BUCKET, file_path, s3_path) def upload_build_file_to_s3(self, file_path: Path, s3_path: str) -> str: - if CI: + if IS_CI: return self._upload_file_to_s3(S3_BUILDS_BUCKET, file_path, s3_path) return S3Helper.copy_file_to_local(S3_BUILDS_BUCKET, file_path, s3_path) @@ -255,7 +255,7 @@ class S3Helper: if full_fs_path.is_symlink(): if upload_symlinks: - if CI: + if IS_CI: return self._upload_file_to_s3( bucket_name, full_fs_path, @@ -266,7 +266,7 @@ class S3Helper: ) return [] - if CI: + if IS_CI: return self._upload_file_to_s3( bucket_name, full_fs_path, full_s3_path + "/" + file_path.name ) @@ -331,7 +331,7 @@ class S3Helper: return result def url_if_exists(self, key: str, bucket: str = S3_BUILDS_BUCKET) -> str: - if not CI: + if not IS_CI: local_path = self.local_path(bucket, key) if local_path.exists(): return local_path.as_uri() @@ -345,7 +345,7 @@ class S3Helper: @staticmethod def get_url(bucket: str, key: str) -> str: - if CI: + if IS_CI: return S3Helper.s3_url(bucket, key) return S3Helper.local_path(bucket, key).as_uri() diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index 9d33c480598..a68db8b9791 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -6,12 +6,13 @@ import subprocess import sys from pathlib import Path -from build_download_helper import get_build_name_for_check, read_build_urls +from build_download_helper import read_build_urls from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import REPORT_PATH, TEMP_PATH from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults from stopwatch import Stopwatch from tee_popen import TeePopen +from ci_config import CI IMAGE_NAME = "clickhouse/sqlancer-test" @@ -43,7 +44,7 @@ def main(): docker_image = pull_image(get_docker_image(IMAGE_NAME)) - build_name = get_build_name_for_check(check_name) + build_name = CI.get_required_build_name(check_name) urls = read_build_urls(build_name, reports_path) if not urls: raise ValueError("No build URLs found") diff --git a/tests/ci/sqltest.py b/tests/ci/sqltest.py index c8c2adbbd56..8e6ca6ff87f 100644 --- a/tests/ci/sqltest.py +++ b/tests/ci/sqltest.py @@ -6,12 +6,13 @@ import subprocess import sys from pathlib import Path -from build_download_helper import get_build_name_for_check, read_build_urls +from build_download_helper import read_build_urls from docker_images_helper import get_docker_image, pull_image from env_helper import REPORT_PATH, TEMP_PATH from pr_info import PRInfo from report import SUCCESS, JobReport, TestResult from stopwatch import Stopwatch +from ci_config import CI IMAGE_NAME = "clickhouse/sqltest" @@ -49,7 +50,7 @@ def main(): docker_image = pull_image(get_docker_image(IMAGE_NAME)) - build_name = get_build_name_for_check(check_name) + build_name = CI.get_required_build_name(check_name) print(build_name) urls = read_build_urls(build_name, reports_path) if not urls: diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9906d87a8c0..9deae06d9f4 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -13,7 +13,7 @@ from typing import List, Tuple, Union import magic from docker_images_helper import get_docker_image, pull_image -from env_helper import CI, REPO_COPY, TEMP_PATH +from env_helper import IS_CI, REPO_COPY, TEMP_PATH, GITHUB_EVENT_PATH from git_helper import GIT_PREFIX, git_runner from pr_info import PRInfo from report import ERROR, FAILURE, SUCCESS, JobReport, TestResults, read_test_results @@ -152,7 +152,7 @@ def main(): run_cpp_check = True run_shell_check = True run_python_check = True - if CI and pr_info.number > 0: + if IS_CI and pr_info.number > 0: pr_info.fetch_changed_files() run_cpp_check = any( not (is_python(file) or is_shell(file)) for file in pr_info.changed_files @@ -216,7 +216,8 @@ def main(): status=state, start_time=stopwatch.start_time_str, duration=stopwatch.duration_seconds, - additional_files=additional_files, + # add GITHUB_EVENT_PATH json file to have it in style check report. sometimes it's needed for debugging. + additional_files=additional_files + [Path(GITHUB_EVENT_PATH)], ).dump() if state in [ERROR, FAILURE]: diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py index f1073603e8d..8251ccbaf38 100644 --- a/tests/ci/sync_pr.py +++ b/tests/ci/sync_pr.py @@ -5,12 +5,12 @@ import argparse import sys -from ci_config import StatusNames from commit_status_helper import get_commit, post_commit_status from get_robot_token import get_best_robot_token from github_helper import GitHub from pr_info import PRInfo from report import SUCCESS +from ci_config import CI def parse_args() -> argparse.Namespace: @@ -75,7 +75,7 @@ def set_sync_status(gh, pr_info, sync_pr): if sync_pr.mergeable_state == "clean": print(f"Sync PR [{sync_pr.number}] is clean") post_commit_status( - get_commit(gh, pr_info.sha), SUCCESS, "", "", StatusNames.SYNC + get_commit(gh, pr_info.sha), SUCCESS, "", "", CI.StatusNames.SYNC ) else: print( diff --git a/tests/ci/test_ci_cache.py b/tests/ci/test_ci_cache.py index b1be0709803..81d649b246b 100644 --- a/tests/ci/test_ci_cache.py +++ b/tests/ci/test_ci_cache.py @@ -5,12 +5,12 @@ from pathlib import Path import shutil from typing import Dict, Set import unittest -from ci_config import Build, JobNames from s3_helper import S3Helper from ci_cache import CiCache from digest_helper import JOB_DIGEST_LEN from commit_status_helper import CommitStatusData from env_helper import S3_BUILDS_BUCKET, TEMP_PATH +from ci_config import CI def _create_mock_digest_1(string): @@ -21,8 +21,8 @@ def _create_mock_digest_2(string): return md5((string + "+nonce").encode("utf-8")).hexdigest()[:JOB_DIGEST_LEN] -DIGESTS = {job: _create_mock_digest_1(job) for job in JobNames} -DIGESTS2 = {job: _create_mock_digest_2(job) for job in JobNames} +DIGESTS = {job: _create_mock_digest_1(job) for job in CI.JobNames} +DIGESTS2 = {job: _create_mock_digest_2(job) for job in CI.JobNames} # pylint:disable=protected-access @@ -84,8 +84,10 @@ class TestCiCache(unittest.TestCase): NUM_BATCHES = 10 DOCS_JOBS_NUM = 1 - assert len(set(job for job in JobNames)) == len(list(job for job in JobNames)) - NONDOCS_JOBS_NUM = len(set(job for job in JobNames)) - DOCS_JOBS_NUM + assert len(set(job for job in CI.JobNames)) == len( + list(job for job in CI.JobNames) + ) + NONDOCS_JOBS_NUM = len(set(job for job in CI.JobNames)) - DOCS_JOBS_NUM PR_NUM = 123456 status = CommitStatusData( @@ -97,13 +99,13 @@ class TestCiCache(unittest.TestCase): ) ### add some pending statuses for two batches, non-release branch - for job in JobNames: + for job in CI.JobNames: ci_cache.push_pending(job, [0, 1, 2], NUM_BATCHES, release_branch=False) ci_cache_2.push_pending(job, [0, 1, 2], NUM_BATCHES, release_branch=False) ### add success status for 0 batch, non-release branch batch = 0 - for job in JobNames: + for job in CI.JobNames: ci_cache.push_successful( job, batch, NUM_BATCHES, status, release_branch=False ) @@ -113,21 +115,17 @@ class TestCiCache(unittest.TestCase): ### add failed status for 2 batch, non-release branch batch = 2 - for job in JobNames: + for job in CI.JobNames: ci_cache.push_failed(job, batch, NUM_BATCHES, status, release_branch=False) ci_cache_2.push_failed( job, batch, NUM_BATCHES, status, release_branch=False ) ### check all expected directories were created on s3 mock - expected_build_path_1 = f"{CiCache.JobType.SRCS.value}-{_create_mock_digest_1(Build.PACKAGE_RELEASE)}" - expected_docs_path_1 = ( - f"{CiCache.JobType.DOCS.value}-{_create_mock_digest_1(JobNames.DOCS_CHECK)}" - ) - expected_build_path_2 = f"{CiCache.JobType.SRCS.value}-{_create_mock_digest_2(Build.PACKAGE_RELEASE)}" - expected_docs_path_2 = ( - f"{CiCache.JobType.DOCS.value}-{_create_mock_digest_2(JobNames.DOCS_CHECK)}" - ) + expected_build_path_1 = f"{CiCache.JobType.SRCS.value}-{_create_mock_digest_1(CI.BuildNames.PACKAGE_RELEASE)}" + expected_docs_path_1 = f"{CiCache.JobType.DOCS.value}-{_create_mock_digest_1(CI.JobNames.DOCS_CHECK)}" + expected_build_path_2 = f"{CiCache.JobType.SRCS.value}-{_create_mock_digest_2(CI.BuildNames.PACKAGE_RELEASE)}" + expected_docs_path_2 = f"{CiCache.JobType.DOCS.value}-{_create_mock_digest_2(CI.JobNames.DOCS_CHECK)}" self.assertCountEqual( list(s3_mock.files_on_s3_paths.keys()), [ @@ -174,7 +172,7 @@ class TestCiCache(unittest.TestCase): ) ### check statuses for all jobs in cache - for job in JobNames: + for job in CI.JobNames: self.assertEqual( ci_cache.is_successful(job, 0, NUM_BATCHES, release_branch=False), True ) @@ -212,7 +210,7 @@ class TestCiCache(unittest.TestCase): assert status2 is None ### add some more pending statuses for two batches and for a release branch - for job in JobNames: + for job in CI.JobNames: ci_cache.push_pending( job, batches=[0, 1], num_batches=NUM_BATCHES, release_branch=True ) @@ -226,7 +224,7 @@ class TestCiCache(unittest.TestCase): sha="deadbeaf2", pr_num=PR_NUM, ) - for job in JobNames: + for job in CI.JobNames: ci_cache.push_successful(job, 0, NUM_BATCHES, status, release_branch=True) ### check number of cache files is as expected @@ -249,7 +247,7 @@ class TestCiCache(unittest.TestCase): ) ### check statuses - for job in JobNames: + for job in CI.JobNames: self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, False), True) self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, True), True) self.assertEqual(ci_cache.is_successful(job, 1, NUM_BATCHES, False), False) @@ -273,7 +271,7 @@ class TestCiCache(unittest.TestCase): ### create new cache object and verify the same checks ci_cache = CiCache(s3_mock, DIGESTS) - for job in JobNames: + for job in CI.JobNames: self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, False), True) self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, True), True) self.assertEqual(ci_cache.is_successful(job, 1, NUM_BATCHES, False), False) diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index badbc4c5dcf..47247b91858 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -1,30 +1,491 @@ #!/usr/bin/env python3 import unittest -from ci_config import CIStages, JobNames, CI_CONFIG, Runners +from ci_config import CI +import ci as CIPY +from ci_settings import CiSettings +from pr_info import PRInfo, EventType +from s3_helper import S3Helper +from ci_cache import CiCache +from ci_utils import normalize_string + + +_TEST_EVENT_JSON = {"dummy": "dummy"} + +# pylint:disable=protected-access,union-attr class TestCIConfig(unittest.TestCase): def test_runner_config(self): """check runner is provided w/o exception""" - for job in JobNames: - runner = CI_CONFIG.get_runner_type(job) - self.assertIn(runner, Runners) + for job in CI.JobNames: + self.assertIn(CI.JOB_CONFIGS[job].runner_type, CI.Runners) + if ( + job + in ( + CI.JobNames.STYLE_CHECK, + CI.JobNames.BUILD_CHECK, + ) + or "jepsen" in job.lower() + ): + self.assertTrue( + "style" in CI.JOB_CONFIGS[job].runner_type, + f"Job [{job}] must have style-checker(-aarch64) runner", + ) + elif "binary_" in job.lower() or "package_" in job.lower(): + self.assertTrue( + CI.JOB_CONFIGS[job].runner_type == CI.Runners.BUILDER, + f"Job [{job}] must have [{CI.Runners.BUILDER}] runner", + ) + elif "aarch64" in job.lower(): + self.assertTrue( + "aarch" in CI.JOB_CONFIGS[job].runner_type, + f"Job [{job}] does not match runner [{CI.JOB_CONFIGS[job].runner_type}]", + ) + else: + self.assertTrue( + "aarch" not in CI.JOB_CONFIGS[job].runner_type, + f"Job [{job}] does not match runner [{CI.JOB_CONFIGS[job].runner_type}]", + ) + + def test_common_configs_applied_properly(self): + for job in CI.JobNames: + if CI.JOB_CONFIGS[job].job_name_keyword: + self.assertTrue( + CI.JOB_CONFIGS[job].job_name_keyword.lower() + in normalize_string(job), + f"Job [{job}] apparently uses wrong common config with job keyword [{CI.JOB_CONFIGS[job].job_name_keyword}]", + ) + + def test_required_checks(self): + for job in CI.REQUIRED_CHECKS: + if job in (CI.StatusNames.PR_CHECK, CI.StatusNames.SYNC): + continue + self.assertTrue(job in CI.JOB_CONFIGS, f"Job [{job}] not in job config") + + def test_builds_configs(self): + """build name in the build config must match the job name""" + for job in CI.JobNames: + self.assertTrue(job in CI.JOB_CONFIGS) + self.assertTrue(CI.JOB_CONFIGS[job].runner_type in CI.Runners) + if job in CI.BuildNames: + self.assertTrue(CI.JOB_CONFIGS[job].build_config.name == job) + self.assertTrue(CI.JOB_CONFIGS[job].required_builds is None) + else: + self.assertTrue(CI.JOB_CONFIGS[job].build_config is None) + if "asan" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_ASAN, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "msan" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_MSAN, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "tsan" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_TSAN, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "ubsan" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_UBSAN, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "debug" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_DEBUG, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "release" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + in ( + CI.BuildNames.PACKAGE_RELEASE, + CI.BuildNames.BINARY_RELEASE, + ), + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "coverage" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_RELEASE_COVERAGE, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "aarch" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_AARCH64, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "amd64" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_RELEASE, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "uzzer" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] == CI.BuildNames.FUZZERS, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "Docker" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + in ( + CI.BuildNames.PACKAGE_RELEASE, + CI.BuildNames.PACKAGE_AARCH64, + ), + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "SQLTest" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_RELEASE, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "Jepsen" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + in ( + CI.BuildNames.PACKAGE_RELEASE, + CI.BuildNames.BINARY_RELEASE, + ), + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif job in ( + CI.JobNames.STYLE_CHECK, + CI.JobNames.FAST_TEST, + CI.JobNames.BUILD_CHECK, + CI.JobNames.DOCS_CHECK, + CI.JobNames.BUGFIX_VALIDATE, + ): + self.assertTrue(CI.JOB_CONFIGS[job].required_builds is None) + else: + print(f"Job [{job}] required build not checked") def test_job_stage_config(self): - """check runner is provided w/o exception""" - for job in JobNames: - stage = CI_CONFIG.get_job_ci_stage(job) - if job in [ - JobNames.STYLE_CHECK, - JobNames.FAST_TEST, - JobNames.JEPSEN_KEEPER, - JobNames.BUILD_CHECK, - JobNames.BUILD_CHECK_SPECIAL, - ]: - assert ( - stage == CIStages.NA - ), "These jobs are not in CI stages, must be NA" + """ + check runner is provided w/o exception + """ + # check stages + for job in CI.JobNames: + if job in CI.BuildNames: + self.assertTrue( + CI.get_job_ci_stage(job) + in (CI.WorkflowStages.BUILDS_1, CI.WorkflowStages.BUILDS_2) + ) else: - assert stage != CIStages.NA, f"stage not found for [{job}]" - self.assertIn(stage, CIStages) + if job in ( + CI.JobNames.STYLE_CHECK, + CI.JobNames.FAST_TEST, + CI.JobNames.JEPSEN_SERVER, + CI.JobNames.JEPSEN_KEEPER, + CI.JobNames.BUILD_CHECK, + ): + self.assertEqual( + CI.get_job_ci_stage(job), + CI.WorkflowStages.NA, + msg=f"Stage for [{job}] is not correct", + ) + else: + self.assertTrue( + CI.get_job_ci_stage(job) + in (CI.WorkflowStages.TESTS_1, CI.WorkflowStages.TESTS_3), + msg=f"Stage for [{job}] is not correct", + ) + + def test_job_stage_config_non_blocking(self): + """ + check runner is provided w/o exception + """ + # check stages + for job in CI.JobNames: + if job in CI.BuildNames: + self.assertTrue( + CI.get_job_ci_stage(job) + in (CI.WorkflowStages.BUILDS_1, CI.WorkflowStages.BUILDS_2) + ) + else: + if job in ( + CI.JobNames.STYLE_CHECK, + CI.JobNames.FAST_TEST, + CI.JobNames.JEPSEN_SERVER, + CI.JobNames.JEPSEN_KEEPER, + CI.JobNames.BUILD_CHECK, + ): + self.assertEqual( + CI.get_job_ci_stage(job), + CI.WorkflowStages.NA, + msg=f"Stage for [{job}] is not correct", + ) + else: + self.assertTrue( + CI.get_job_ci_stage(job, non_blocking_ci=True) + in (CI.WorkflowStages.TESTS_1, CI.WorkflowStages.TESTS_2), + msg=f"Stage for [{job}] is not correct", + ) + + def test_build_jobs_configs(self): + """ + check build jobs have non-None build_config attribute + check test jobs have None build_config attribute + """ + for job in CI.JobNames: + if job in CI.BuildNames: + self.assertTrue( + isinstance(CI.JOB_CONFIGS[job].build_config, CI.BuildConfig) + ) + else: + self.assertTrue(CI.JOB_CONFIGS[job].build_config is None) + + def test_ci_py_for_pull_request(self): + """ + checks ci.py job configuration + """ + settings = CiSettings() + settings.no_ci_cache = True + settings.ci_sets = [CI.Tags.CI_SET_BUILDS] + settings.include_keywords = [ + "package", + "integration", + "upgrade", + "clickHouse_build_check", + "stateless", + ] + settings.exclude_keywords = ["asan", "aarch64"] + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + # make it pull request info + pr_info.event_type = EventType.PULL_REQUEST + pr_info.number = 12345 + assert pr_info.is_pr and not pr_info.is_release and not pr_info.is_master + assert not pr_info.is_merge_queue + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + actual_jobs_to_do = list(ci_cache.jobs_to_do) + expected_jobs_to_do = [] + for set_ in settings.ci_sets: + tag_config = CI.get_tag_config(set_) + assert tag_config + set_jobs = tag_config.run_jobs + for job in set_jobs: + if any(k in normalize_string(job) for k in settings.exclude_keywords): + continue + expected_jobs_to_do.append(job) + for job, config in CI.JOB_CONFIGS.items(): + if not any( + keyword in normalize_string(job) + for keyword in settings.include_keywords + ): + continue + if any( + keyword in normalize_string(job) + for keyword in settings.exclude_keywords + ): + continue + if config.random_bucket: + continue + if job not in expected_jobs_to_do: + expected_jobs_to_do.append(job) + + random_buckets = [] + for job, config in ci_cache.jobs_to_do.items(): + if config.random_bucket: + self.assertTrue( + config.random_bucket not in random_buckets, + "Only one job must be picked up from each random bucket", + ) + random_buckets.append(config.random_bucket) + actual_jobs_to_do.remove(job) + + self.assertCountEqual(expected_jobs_to_do, actual_jobs_to_do) + + def test_ci_py_for_pull_request_no_settings(self): + """ + checks ci.py job configuration in PR with empty ci_settings + """ + settings = CiSettings() + settings.no_ci_cache = True + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + # make it pull request info + pr_info.event_type = EventType.PULL_REQUEST + pr_info.number = 12345 + assert pr_info.is_pr and not pr_info.is_release and not pr_info.is_master + assert not pr_info.is_merge_queue + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + actual_jobs_to_do = list(ci_cache.jobs_to_do) + expected_jobs_to_do = [] + for job, config in CI.JOB_CONFIGS.items(): + if config.random_bucket: + continue + if config.release_only: + continue + if config.run_by_label: + continue + expected_jobs_to_do.append(job) + + random_buckets = [] + for job, config in ci_cache.jobs_to_do.items(): + if config.random_bucket: + self.assertTrue( + config.random_bucket not in random_buckets, + "Only one job must be picked up from each random bucket", + ) + random_buckets.append(config.random_bucket) + actual_jobs_to_do.remove(job) + + self.assertCountEqual(expected_jobs_to_do, actual_jobs_to_do) + + def test_ci_py_for_master(self): + """ + checks ci.py job configuration + """ + settings = CiSettings() + settings.no_ci_cache = True + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + pr_info.event_type = EventType.PUSH + assert pr_info.number == 0 and pr_info.is_release and not pr_info.is_merge_queue + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + actual_jobs_to_do = list(ci_cache.jobs_to_do) + expected_jobs_to_do = [] + for job, config in CI.JOB_CONFIGS.items(): + if config.pr_only: + continue + if config.run_by_label: + continue + if job in CI.MQ_JOBS: + continue + expected_jobs_to_do.append(job) + self.assertCountEqual(expected_jobs_to_do, actual_jobs_to_do) + + def test_ci_py_for_merge_queue(self): + """ + checks ci.py job configuration + """ + settings = CiSettings() + settings.no_ci_cache = True + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + # make it merge_queue + pr_info.event_type = EventType.MERGE_QUEUE + assert ( + pr_info.number == 0 + and pr_info.is_merge_queue + and not pr_info.is_release + and not pr_info.is_master + and not pr_info.is_pr + ) + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + actual_jobs_to_do = list(ci_cache.jobs_to_do) + expected_jobs_to_do = [ + "Style check", + "Fast test", + "binary_release", + "Unit tests (release)", + ] + self.assertCountEqual(expected_jobs_to_do, actual_jobs_to_do) + + def test_ci_py_await(self): + """ + checks ci.py job configuration + """ + settings = CiSettings() + settings.no_ci_cache = True + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + pr_info.event_type = EventType.PUSH + pr_info.number = 0 + assert pr_info.is_release and not pr_info.is_merge_queue + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + self.assertTrue(not ci_cache.jobs_to_skip, "Must be no jobs in skip list") + all_jobs_in_wf = list(ci_cache.jobs_to_do) + assert not ci_cache.jobs_to_wait + ci_cache.await_pending_jobs(is_release=pr_info.is_release, dry_run=True) + assert not ci_cache.jobs_to_skip + assert not ci_cache.jobs_to_wait + + # pretend there are pending jobs that we neet to wait + ci_cache.jobs_to_wait = dict(ci_cache.jobs_to_do) + for job, config in ci_cache.jobs_to_wait.items(): + assert not config.pending_batches + assert config.batches + config.pending_batches = list(config.batches) + for job, config in ci_cache.jobs_to_wait.items(): + for batch in range(config.num_batches): + record = CiCache.Record( + record_type=CiCache.RecordType.PENDING, + job_name=job, + job_digest=ci_cache.job_digests[job], + batch=batch, + num_batches=config.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == CiCache.RecordType.PENDING.value: + records_[record.to_str_key()] = record + + def _test_await_for_batch( + ci_cache: CiCache, record_type: CiCache.RecordType, batch: int + ) -> None: + assert ci_cache.jobs_to_wait + for job_, config_ in ci_cache.jobs_to_wait.items(): + record = CiCache.Record( + record_type=record_type, + job_name=job_, + job_digest=ci_cache.job_digests[job_], + batch=batch, + num_batches=config_.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == record_type.value: + records_[record.to_str_key()] = record + # await + ci_cache.await_pending_jobs(is_release=pr_info.is_release, dry_run=True) + for _, config_ in ci_cache.jobs_to_wait.items(): + assert config_.pending_batches + if ( + record_type != CiCache.RecordType.PENDING + and batch < config_.num_batches + ): + assert batch not in config_.pending_batches + else: + assert batch in config_.pending_batches + + for _, config_ in ci_cache.jobs_to_do.items(): + # jobs to do must have batches to run before/after await + # if it's an empty list after await - apparently job has not been removed after await + assert config_.batches + + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 0) + # check all one-batch jobs are in jobs_to_skip + for job in all_jobs_in_wf: + config = CI.JOB_CONFIGS[job] + if config.num_batches == 1: + self.assertTrue(job in ci_cache.jobs_to_skip) + self.assertTrue(job not in ci_cache.jobs_to_do) + else: + self.assertTrue(job not in ci_cache.jobs_to_skip) + self.assertTrue(job in ci_cache.jobs_to_do) + + _test_await_for_batch(ci_cache, CiCache.RecordType.FAILED, 1) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 2) + + self.assertTrue(len(ci_cache.jobs_to_skip) > 0) + self.assertTrue(len(ci_cache.jobs_to_do) > 0) + self.assertCountEqual( + list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, all_jobs_in_wf + ) diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index 60888932803..3f158e79f30 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -4,7 +4,7 @@ import unittest from ci_settings import CiSettings -from ci_config import JobConfig +from ci_config import CI _TEST_BODY_1 = """ #### Run only: @@ -19,6 +19,7 @@ _TEST_BODY_1 = """ #### CI options: - [ ] do not test (only style check) +- [x] Woolen Wolfdog CI - [x] disable merge-commit (no merge from master before tests) - [ ] disable CI cache (job reuse) @@ -64,8 +65,8 @@ _TEST_JOB_LIST = [ "fuzzers", "Docker server image", "Docker keeper image", - "Install packages (amd64)", - "Install packages (arm64)", + "Install packages (release)", + "Install packages (aarch64)", "Stateless tests (debug)", "Stateless tests (release)", "Stateless tests (coverage)", @@ -120,19 +121,18 @@ _TEST_JOB_LIST = [ "AST fuzzer (ubsan)", "ClickHouse Keeper Jepsen", "ClickHouse Server Jepsen", - "Performance Comparison", - "Performance Comparison Aarch64", + "Performance Comparison (release)", + "Performance Comparison (aarch64)", "Sqllogic test (release)", "SQLancer (release)", "SQLancer (debug)", "SQLTest", - "Compatibility check (amd64)", + "Compatibility check (release)", "Compatibility check (aarch64)", - "ClickBench (amd64)", + "ClickBench (release)", "ClickBench (aarch64)", "libFuzzer tests", - "ClickHouse build check", - "ClickHouse special build check", + "Builds", "Docs check", "Bugfix validation", ] @@ -148,6 +148,7 @@ class TestCIOptions(unittest.TestCase): self.assertFalse(ci_options.do_not_test) self.assertFalse(ci_options.no_ci_cache) self.assertTrue(ci_options.no_merge_commit) + self.assertTrue(ci_options.woolen_wolfdog) self.assertEqual(ci_options.ci_sets, ["ci_set_non_required"]) self.assertCountEqual(ci_options.include_keywords, ["foo", "foo_bar"]) self.assertCountEqual(ci_options.exclude_keywords, ["foo", "foo_bar"]) @@ -157,6 +158,7 @@ class TestCIOptions(unittest.TestCase): ci_options = CiSettings.create_from_pr_message( _TEST_BODY_2, update_from_api=False ) + self.assertFalse(ci_options.woolen_wolfdog) self.assertCountEqual( ci_options.include_keywords, ["integration", "foo_bar", "stateless", "azure"], @@ -166,7 +168,10 @@ class TestCIOptions(unittest.TestCase): ["tsan", "foobar", "aarch64", "analyzer", "s3_storage", "coverage"], ) - jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST} + jobs_configs = { + job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) + for job in _TEST_JOB_LIST + } jobs_configs[ "fuzzers" ].run_by_label = ( @@ -210,7 +215,10 @@ class TestCIOptions(unittest.TestCase): ) def test_options_applied_2(self): - jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST_2} + jobs_configs = { + job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) + for job in _TEST_JOB_LIST_2 + } jobs_configs["Style check"].release_only = True jobs_configs["Fast test"].pr_only = True jobs_configs["fuzzers"].run_by_label = "TEST_LABEL" @@ -252,7 +260,10 @@ class TestCIOptions(unittest.TestCase): def test_options_applied_3(self): ci_settings = CiSettings() ci_settings.include_keywords = ["Style"] - jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST_2} + jobs_configs = { + job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) + for job in _TEST_JOB_LIST_2 + } jobs_configs["Style check"].release_only = True jobs_configs["Fast test"].pr_only = True # no settings are set @@ -296,7 +307,10 @@ class TestCIOptions(unittest.TestCase): ) self.assertCountEqual(ci_options.include_keywords, ["analyzer"]) self.assertIsNone(ci_options.exclude_keywords) - jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST} + jobs_configs = { + job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) + for job in _TEST_JOB_LIST + } jobs_configs[ "fuzzers" ].run_by_label = "TEST_LABEL" # check "fuzzers" does not appears in the result diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh index 3e2f33c89d1..eb410ddcb00 100644 --- a/tests/ci/worker/prepare-ci-ami.sh +++ b/tests/ci/worker/prepare-ci-ami.sh @@ -9,7 +9,7 @@ set -xeuo pipefail echo "Running prepare script" export DEBIAN_FRONTEND=noninteractive -export RUNNER_VERSION=2.316.1 +export RUNNER_VERSION=2.317.0 export RUNNER_HOME=/home/ubuntu/actions-runner deb_arch() { @@ -54,7 +54,8 @@ apt-get install --yes --no-install-recommends \ python3-dev \ python3-pip \ qemu-user-static \ - unzip + unzip \ + gh # Install docker curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg @@ -101,7 +102,7 @@ sudo -u ubuntu docker buildx version sudo -u ubuntu docker buildx rm default-builder || : # if it's the second attempt sudo -u ubuntu docker buildx create --use --name default-builder -pip install boto3 pygithub requests urllib3 unidiff dohq-artifactory +pip install boto3 pygithub requests urllib3 unidiff dohq-artifactory jwt rm -rf $RUNNER_HOME # if it's the second attempt mkdir -p $RUNNER_HOME && cd $RUNNER_HOME @@ -212,9 +213,9 @@ chmod +x /usr/local/share/scripts/init-network.sh touch /var/tmp/clickhouse-ci-ami.success # END OF THE SCRIPT -# TOE description +# TOE (Task Orchestrator and Executor) description # name: CIInfrastructurePrepare -# description: instals the infrastructure for ClickHouse CI runners +# description: installs the infrastructure for ClickHouse CI runners # schemaVersion: 1.0 # # phases: diff --git a/tests/config/install.sh b/tests/config/install.sh index e04392d893b..08ee11a7407 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -183,13 +183,7 @@ elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then fi if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then - if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - echo "Azure configuration will not be added" - else - echo "Adding azure configuration" - ln -sf $SRC_PATH/config.d/azure_storage_conf.xml $DEST_SERVER_PATH/config.d/ - fi - + ln -sf $SRC_PATH/config.d/azure_storage_conf.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/storage_conf_02944.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/storage_conf_02963.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/instructions/easy_tasks_sorted_ru.md b/tests/instructions/easy_tasks_sorted_ru.md index bc95e6b1c37..fbd86ebf08f 100644 --- a/tests/instructions/easy_tasks_sorted_ru.md +++ b/tests/instructions/easy_tasks_sorted_ru.md @@ -78,7 +78,7 @@ Upd: сделали по-другому: теперь всё безопасно. ## LEFT ONLY JOIN -## Функции makeDate, makeDateTime. +## + Функции makeDate, makeDateTime. `makeDate(year, month, day)` `makeDateTime(year, month, day, hour, minute, second, [timezone])` @@ -187,13 +187,13 @@ https://clickhouse.com/docs/en/operations/table_engines/external_data/ Не работает, если открыть clickhouse-client в интерактивном режиме и делать несколько запросов. -## + Настройка для возможности получить частичный результат при cancel-е. +## Настройка для возможности получить частичный результат при cancel-е. Хотим по Ctrl+C получить те данные, которые успели обработаться. ## Раскрытие кортежей в функциях высшего порядка. -## Табличная функция loop. +## + Табличная функция loop. `SELECT * FROM loop(database, table)` diff --git a/tests/integration/compose/docker_compose_ldap.yml b/tests/integration/compose/docker_compose_ldap.yml index f199516f315..1f50b34735d 100644 --- a/tests/integration/compose/docker_compose_ldap.yml +++ b/tests/integration/compose/docker_compose_ldap.yml @@ -15,7 +15,10 @@ services: ports: - ${LDAP_EXTERNAL_PORT:-1389}:${LDAP_INTERNAL_PORT:-1389} healthcheck: - test: "ldapsearch -x -b dc=example,dc=org cn > /dev/null" + test: > + ldapsearch -x -H ldap://localhost:$$LDAP_PORT_NUMBER -D $$LDAP_ADMIN_DN -w $$LDAP_ADMIN_PASSWORD -b $$LDAP_ROOT + | grep -c -E "member: cn=j(ohn|ane)doe" + | grep 2 >> /dev/null interval: 10s retries: 10 timeout: 2s diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 41c162217d2..544b06cca1b 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2640,7 +2640,9 @@ class ClickHouseCluster: [ "bash", "-c", - f"/opt/bitnami/openldap/bin/ldapsearch -x -H ldap://{self.ldap_host}:{self.ldap_port} -D cn=admin,dc=example,dc=org -w clickhouse -b dc=example,dc=org", + f"/opt/bitnami/openldap/bin/ldapsearch -x -H ldap://{self.ldap_host}:{self.ldap_port} -D cn=admin,dc=example,dc=org -w clickhouse -b dc=example,dc=org" + f'| grep -c -E "member: cn=j(ohn|ane)doe"' + f"| grep 2 >> /dev/null", ], user="root", ) diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py index 39fa0d0f074..be710db37d1 100644 --- a/tests/integration/helpers/keeper_utils.py +++ b/tests/integration/helpers/keeper_utils.py @@ -124,27 +124,27 @@ class KeeperClient(object): return data def cd(self, path: str, timeout: float = 60.0): - self.execute_query(f"cd {path}", timeout) + self.execute_query(f"cd '{path}'", timeout) def ls(self, path: str, timeout: float = 60.0) -> list[str]: - return self.execute_query(f"ls {path}", timeout).split(" ") + return self.execute_query(f"ls '{path}'", timeout).split(" ") def create(self, path: str, value: str, timeout: float = 60.0): - self.execute_query(f"create {path} {value}", timeout) + self.execute_query(f"create '{path}' '{value}'", timeout) def get(self, path: str, timeout: float = 60.0) -> str: - return self.execute_query(f"get {path}", timeout) + return self.execute_query(f"get '{path}'", timeout) def set(self, path: str, value: str, version: tp.Optional[int] = None) -> None: self.execute_query( - f"set {path} {value} {version if version is not None else ''}" + f"set '{path}' '{value}' {version if version is not None else ''}" ) def rm(self, path: str, version: tp.Optional[int] = None) -> None: - self.execute_query(f"rm {path} {version if version is not None else ''}") + self.execute_query(f"rm '{path}' {version if version is not None else ''}") def exists(self, path: str, timeout: float = 60.0) -> bool: - return bool(int(self.execute_query(f"exists {path}", timeout))) + return bool(int(self.execute_query(f"exists '{path}'", timeout))) def stop(self): if not self.stopped: @@ -152,22 +152,22 @@ class KeeperClient(object): self.proc.communicate(b"exit\n", timeout=10.0) def sync(self, path: str, timeout: float = 60.0): - self.execute_query(f"sync {path}", timeout) + self.execute_query(f"sync '{path}'", timeout) def touch(self, path: str, timeout: float = 60.0): - self.execute_query(f"touch {path}", timeout) + self.execute_query(f"touch '{path}'", timeout) def find_big_family(self, path: str, n: int = 10, timeout: float = 60.0) -> str: - return self.execute_query(f"find_big_family {path} {n}", timeout) + return self.execute_query(f"find_big_family '{path}' {n}", timeout) def find_super_nodes(self, threshold: int, timeout: float = 60.0) -> str: return self.execute_query(f"find_super_nodes {threshold}", timeout) def get_direct_children_number(self, path: str, timeout: float = 60.0) -> str: - return self.execute_query(f"get_direct_children_number {path}", timeout) + return self.execute_query(f"get_direct_children_number '{path}'", timeout) def get_all_children_number(self, path: str, timeout: float = 60.0) -> str: - return self.execute_query(f"get_all_children_number {path}", timeout) + return self.execute_query(f"get_all_children_number '{path}'", timeout) def delete_stale_backups(self, timeout: float = 60.0) -> str: return self.execute_query("delete_stale_backups", timeout) @@ -196,7 +196,7 @@ class KeeperClient(object): ) return self.execute_query( - f"reconfig {operation} {joining or leaving or new_members}", timeout + f"reconfig {operation} '{joining or leaving or new_members}'", timeout ) @classmethod diff --git a/tests/integration/test_MemoryTracking/configs/no_system_log.xml b/tests/integration/test_MemoryTracking/configs/no_system_log.xml index 3218dae4dc7..7d80c7fbf78 100644 --- a/tests/integration/test_MemoryTracking/configs/no_system_log.xml +++ b/tests/integration/test_MemoryTracking/configs/no_system_log.xml @@ -5,6 +5,7 @@ + diff --git a/tests/integration/test_asynchronous_metric_jemalloc_profile_active/test.py b/tests/integration/test_asynchronous_metric_jemalloc_profile_active/test.py index a8f4ab05888..b3769a61b3f 100644 --- a/tests/integration/test_asynchronous_metric_jemalloc_profile_active/test.py +++ b/tests/integration/test_asynchronous_metric_jemalloc_profile_active/test.py @@ -7,7 +7,6 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", main_configs=["configs/asynchronous_metrics_update_period_s.xml"], - env_variables={"MALLOC_CONF": "background_thread:true,prof:true"}, ) @@ -29,26 +28,11 @@ def test_asynchronous_metric_jemalloc_profile_active(started_cluster): if node1.is_built_with_sanitizer(): pytest.skip("Disabled for sanitizers") - res_o = node1.query( + res = node1.query( "SELECT * FROM system.asynchronous_metrics WHERE metric ILIKE '%jemalloc.prof.active%' FORMAT Vertical;" ) assert ( - res_o - == """Row 1: -────── -metric: jemalloc.prof.active -value: 1 -description: An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html -""" - ) - # disable - node1.query("SYSTEM JEMALLOC DISABLE PROFILE") - time.sleep(5) - res_t = node1.query( - "SELECT * FROM system.asynchronous_metrics WHERE metric ILIKE '%jemalloc.prof.active%' FORMAT Vertical;" - ) - assert ( - res_t + res == """Row 1: ────── metric: jemalloc.prof.active @@ -58,16 +42,31 @@ description: An internal metric of the low-level memory allocator (jemalloc). Se ) # enable node1.query("SYSTEM JEMALLOC ENABLE PROFILE") - time.sleep(5) - res_f = node1.query( + node1.query("SYSTEM RELOAD ASYNCHRONOUS METRICS") + res = node1.query( "SELECT * FROM system.asynchronous_metrics WHERE metric ILIKE '%jemalloc.prof.active%' FORMAT Vertical;" ) assert ( - res_f + res == """Row 1: ────── metric: jemalloc.prof.active value: 1 description: An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html +""" + ) + # disable + node1.query("SYSTEM JEMALLOC DISABLE PROFILE") + node1.query("SYSTEM RELOAD ASYNCHRONOUS METRICS") + res = node1.query( + "SELECT * FROM system.asynchronous_metrics WHERE metric ILIKE '%jemalloc.prof.active%' FORMAT Vertical;" + ) + assert ( + res + == """Row 1: +────── +metric: jemalloc.prof.active +value: 0 +description: An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html """ ) diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index c67f63e3f6b..d8662fad011 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1,5 +1,4 @@ import pytest -import asyncio import glob import re import random @@ -1486,6 +1485,7 @@ def test_backup_all(exclude_system_log_tables): "processors_profile_log", "asynchronous_insert_log", "backup_log", + "error_log", ] exclude_from_backup += ["system." + table_name for table_name in log_tables] diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 967ed6a221c..d53335000a6 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -627,67 +627,126 @@ def test_user_specific_auth(start_cluster): create_user("superuser2") create_user("regularuser") - node.query("CREATE TABLE specific_auth (col UInt64) ENGINE=Memory") + node.query("CREATE TABLE specific_auth (col UInt64) ENGINE=MergeTree ORDER BY col") + node.query("INSERT INTO specific_auth VALUES (1)") - assert "Access" in node.query_and_get_error( - "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')" + def backup_restore(backup, user, should_fail, on_cluster=False, base_backup=None): + on_cluster_clause = "ON CLUSTER 'cluster'" if on_cluster else "" + base_backup = ( + f" SETTINGS base_backup = {base_backup}" if base_backup is not None else "" + ) + backup_query = ( + f"BACKUP TABLE specific_auth {on_cluster_clause} TO {backup} {base_backup}" + ) + restore_query = f"RESTORE TABLE specific_auth {on_cluster_clause} FROM {backup}" + + if should_fail: + assert "Access" in node.query_and_get_error(backup_query, user=user) + else: + node.query(backup_query, user=user) + node.query("DROP TABLE specific_auth SYNC") + node.query(restore_query, user=user) + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1/')", + user=None, + should_fail=True, ) - assert "Access" in node.query_and_get_error( - "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1/')", user="regularuser", + should_fail=True, ) - node.query( - "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", - user="superuser1", - ) - node.query( - "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1/')", user="superuser1", + should_fail=False, ) - node.query( - "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup2.zip')", - user="superuser2", - ) - node.query( - "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup2.zip')", + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup2/')", user="superuser2", + should_fail=False, ) assert "Access" in node.query_and_get_error( - "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", + "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1/')", user="regularuser", ) - assert "HTTP response code: 403" in node.query_and_get_error( - "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", + node.query("INSERT INTO specific_auth VALUES (2)") + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1_inc/')", user="regularuser", + should_fail=True, + base_backup="S3('http://minio1:9001/root/data/backups/limited/backup1/')", ) - node.query( - "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1_inc/')", user="superuser1", + should_fail=False, + base_backup="S3('http://minio1:9001/root/data/backups/limited/backup1/')", + ) + + assert "Access" in node.query_and_get_error( + "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1_inc/')", + user="regularuser", ) assert "Access Denied" in node.query_and_get_error( - "BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')", + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1/*', 'RawBLOB')", user="regularuser", ) node.query( - "BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')", + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1/*', 'RawBLOB')", user="superuser1", ) + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="regularuser", + should_fail=True, + on_cluster=True, + ) + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="superuser1", + should_fail=False, + on_cluster=True, + ) + assert "Access Denied" in node.query_and_get_error( "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')", user="regularuser", ) - node.query( - "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')", + node.query("INSERT INTO specific_auth VALUES (3)") + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup3_inc/')", + user="regularuser", + should_fail=True, + on_cluster=True, + base_backup="S3('http://minio1:9001/root/data/backups/limited/backup3/')", + ) + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup3_inc/')", user="superuser1", + should_fail=False, + on_cluster=True, + base_backup="S3('http://minio1:9001/root/data/backups/limited/backup3/')", + ) + + assert "Access Denied" in node.query_and_get_error( + "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3_inc/')", + user="regularuser", ) assert "Access Denied" in node.query_and_get_error( diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index b3b8bf86800..1ed70e20b79 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -512,7 +512,6 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( ), error -@pytest.mark.skip(reason="test is flaky, waiting ClickHouse/issues/64451") def test_query_is_canceled_with_inf_retries(cluster, broken_s3): node = cluster.instances["node_with_inf_s3_retries"] @@ -534,11 +533,12 @@ def test_query_is_canceled_with_inf_retries(cluster, broken_s3): SELECT * FROM system.numbers - LIMIT 1000000 + LIMIT 1000000000 SETTINGS s3_max_single_part_upload_size=100, s3_min_upload_part_size=10000, - s3_check_objects_after_upload=0 + s3_check_objects_after_upload=0, + s3_max_inflight_parts_for_one_file=1000 """, query_id=insert_query_id, ) diff --git a/tests/integration/test_composable_protocols/configs/config.xml b/tests/integration/test_composable_protocols/configs/config.xml index 09a512eb5a4..7d39363bc62 100644 --- a/tests/integration/test_composable_protocols/configs/config.xml +++ b/tests/integration/test_composable_protocols/configs/config.xml @@ -58,6 +58,26 @@ 8444 https protocol endpoint + + tls + http + 0.0.0.0 + 8445 + https protocol with TLSv1_2 minimum version + sslv2,sslv3,tlsv1,tlsv1_1 + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + + + tls + http + 0.0.0.0 + 8446 + https protocol with TLSv1_3 minimum version + sslv2,sslv3,tlsv1,tlsv1_1,tlsv1_2 + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py index aa5a1e766e6..241d1505433 100644 --- a/tests/integration/test_composable_protocols/test.py +++ b/tests/integration/test_composable_protocols/test.py @@ -7,6 +7,7 @@ from helpers.client import Client import urllib.request, urllib.parse import subprocess import socket +import warnings SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -27,18 +28,34 @@ def setup_nodes(): cluster.shutdown() -def execute_query_https(host, port, query): +def execute_query_https(host, port, query, version=None): url = f"https://{host}:{port}/?query={urllib.parse.quote(query)}" ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE + if version: + ctx.minimum_version = version + ctx.maximum_version = version request = urllib.request.Request(url) response = urllib.request.urlopen(request, context=ctx).read() return response.decode("utf-8") +def execute_query_https_unsupported(host, port, query, version=None): + try: + execute_query_https(host, port, query, version) + except Exception as e: + e_text = str(e) + if "NO_PROTOCOLS_AVAILABLE" in e_text: + return True + if "TLSV1_ALERT_PROTOCOL_VERSION" in e_text: + return True + raise + return False + + def execute_query_http(host, port, query): url = f"http://{host}:{port}/?query={urllib.parse.quote(query)}" @@ -84,6 +101,49 @@ def test_connections(): assert execute_query_https(server.ip_address, 8444, "SELECT 1") == "1\n" + warnings.filterwarnings("ignore", category=DeprecationWarning) + + assert execute_query_https_unsupported( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.SSLv3 + ) + assert execute_query_https_unsupported( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.TLSv1 + ) + assert execute_query_https_unsupported( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.TLSv1_1 + ) + assert ( + execute_query_https( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.TLSv1_2 + ) + == "1\n" + ) + assert ( + execute_query_https( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.TLSv1_3 + ) + == "1\n" + ) + + assert execute_query_https_unsupported( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.SSLv3 + ) + assert execute_query_https_unsupported( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.TLSv1 + ) + assert execute_query_https_unsupported( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.TLSv1_1 + ) + assert execute_query_https_unsupported( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.TLSv1_2 + ) + assert ( + execute_query_https( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.TLSv1_3 + ) + == "1\n" + ) + data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\21ClickHouse client\024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" assert ( netcat(server.ip_address, 9100, bytearray(data, "latin-1")).find( diff --git a/tests/integration/test_config_reloader_interval/__init__.py b/tests/integration/test_config_reloader_interval/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_config_reloader_interval/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_config_reloader_interval/configs/config_reloader.xml b/tests/integration/test_config_reloader_interval/configs/config_reloader.xml new file mode 100644 index 00000000000..1dc9a59bd9d --- /dev/null +++ b/tests/integration/test_config_reloader_interval/configs/config_reloader.xml @@ -0,0 +1,4 @@ + + + 1000 + diff --git a/tests/integration/test_config_reloader_interval/test.py b/tests/integration/test_config_reloader_interval/test.py new file mode 100644 index 00000000000..22b66ecac30 --- /dev/null +++ b/tests/integration/test_config_reloader_interval/test.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +import pytest +import fnmatch + +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/config_reloader.xml"], +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_reload_config(start_cluster): + assert node.wait_for_log_line( + f"Config reload interval set to 1000ms", look_behind_lines=2000 + ) + + assert ( + node.query( + "SELECT value from system.server_settings where name = 'config_reload_interval_ms'" + ) + == "1000\n" + ) + node.replace_in_config( + "/etc/clickhouse-server/config.d/config_reloader.xml", + "1000", + "7777", + ) + + assert node.wait_for_log_line( + f"Config reload interval changed to 7777ms", look_behind_lines=2000 + ) + + assert ( + node.query( + "SELECT value from system.server_settings where name = 'config_reload_interval_ms'" + ) + == "7777\n" + ) diff --git a/tests/integration/test_config_xml_full/configs/config.d/error_log.xml b/tests/integration/test_config_xml_full/configs/config.d/error_log.xml new file mode 100644 index 00000000000..903d8699f5c --- /dev/null +++ b/tests/integration/test_config_xml_full/configs/config.d/error_log.xml @@ -0,0 +1,8 @@ + + + system + error_log
+ 7500 + 1000 +
+
diff --git a/tests/integration/test_config_xml_full/configs/config.xml b/tests/integration/test_config_xml_full/configs/config.xml index 628e1432350..61aa0a5c724 100644 --- a/tests/integration/test_config_xml_full/configs/config.xml +++ b/tests/integration/test_config_xml_full/configs/config.xml @@ -756,6 +756,14 @@ 1000
+ + + system + error_log
+ 7500 + 1000 +
+ + diff --git a/tests/integration/test_scheduler/configs/resources.xml.default b/tests/integration/test_scheduler/configs/resources.xml.default new file mode 100644 index 00000000000..3b003a17557 --- /dev/null +++ b/tests/integration/test_scheduler/configs/resources.xml.default @@ -0,0 +1,76 @@ + + + + inflight_limit1000000 + priority + fifo0 + fair1 + fifo9 + fifo1 + fair90 + fifo + fifo + fifo9 + fifo9 + fifo9 + fifo9 + + + inflight_limit1000000 + priority + fifo0 + fair1 + fifo9 + fifo1 + fair90 + fifo + fifo + fifo9 + fifo9 + fifo9 + fifo9 + + + + + /prio/admin + /prio/admin + + + /prio/fair/prod + /prio/fair/prod + + + /prio/fair/dev + /prio/fair/dev + + + /prio/fair/dev + /prio/fair/dev + + + /prio/fair/sys/merges + /prio/fair/sys/merges + + + /prio/fair/sys/mutations + /prio/fair/sys/mutations + + + /prio/fair/prod_merges + /prio/fair/prod_merges + + + /prio/fair/prod_mutations + /prio/fair/prod_mutations + + + /prio/fair/dev_merges + /prio/fair/dev_merges + + + /prio/fair/dev_mutations + /prio/fair/dev_mutations + + + diff --git a/tests/integration/test_scheduler/configs/scheduler.xml b/tests/integration/test_scheduler/configs/scheduler.xml deleted file mode 100644 index 523ba1a5a98..00000000000 --- a/tests/integration/test_scheduler/configs/scheduler.xml +++ /dev/null @@ -1,62 +0,0 @@ - - - - - s3 - http://minio1:9001/root/data/ - minio - minio123 - 33554432 - 10 - 10 - network_read - network_write - - - - - -

- s3 -
- - - - - - - inflight_limit1000000 - priority - fifo0 - fair1 - fifo9 - fifo1 - - - inflight_limit1000000 - priority - fifo0 - fair1 - fifo9 - fifo1 - - - - - /prio/admin - /prio/admin - - - /prio/fair/prod - /prio/fair/prod - - - /prio/fair/dev - /prio/fair/dev - - - /prio/fair/dev - /prio/fair/dev - - -
diff --git a/tests/integration/test_scheduler/configs/storage_configuration.xml b/tests/integration/test_scheduler/configs/storage_configuration.xml new file mode 100644 index 00000000000..823a00a05de --- /dev/null +++ b/tests/integration/test_scheduler/configs/storage_configuration.xml @@ -0,0 +1,26 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + 33554432 + 10 + 10 + network_read + network_write + + + + + +
+ s3 +
+
+
+
+
+
diff --git a/tests/integration/test_scheduler/configs/workloads.xml b/tests/integration/test_scheduler/configs/workloads.xml new file mode 100644 index 00000000000..197bf660500 --- /dev/null +++ b/tests/integration/test_scheduler/configs/workloads.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/integration/test_scheduler/configs/workloads.xml.default b/tests/integration/test_scheduler/configs/workloads.xml.default new file mode 100644 index 00000000000..f010993335d --- /dev/null +++ b/tests/integration/test_scheduler/configs/workloads.xml.default @@ -0,0 +1,4 @@ + + sys_merges + sys_mutations + diff --git a/tests/integration/test_scheduler/test.py b/tests/integration/test_scheduler/test.py index 8e37bd8d403..cde75c244e8 100644 --- a/tests/integration/test_scheduler/test.py +++ b/tests/integration/test_scheduler/test.py @@ -14,7 +14,13 @@ cluster = ClickHouseCluster(__file__) node = cluster.add_instance( "node", stay_alive=True, - main_configs=["configs/scheduler.xml"], + main_configs=[ + "configs/storage_configuration.xml", + "configs/resources.xml", + "configs/resources.xml.default", + "configs/workloads.xml", + "configs/workloads.xml.default", + ], with_minio=True, ) @@ -28,6 +34,41 @@ def start_cluster(): cluster.shutdown() +@pytest.fixture(scope="function", autouse=True) +def set_default_configs(): + node.exec_in_container( + [ + "bash", + "-c", + "cp /etc/clickhouse-server/config.d/resources.xml.default /etc/clickhouse-server/config.d/resources.xml", + ] + ) + node.exec_in_container( + [ + "bash", + "-c", + "cp /etc/clickhouse-server/config.d/workloads.xml.default /etc/clickhouse-server/config.d/workloads.xml", + ] + ) + node.query("system reload config") + yield + + +def update_workloads_config(**settings): + xml = "" + for name in settings: + xml += f"<{name}>{settings[name]}" + print(xml) + node.exec_in_container( + [ + "bash", + "-c", + f"echo '{xml}' > /etc/clickhouse-server/config.d/workloads.xml", + ] + ) + node.query("system reload config") + + def test_s3_disk(): node.query( f""" @@ -111,3 +152,302 @@ def test_s3_disk(): ) == "1\n" ) + + +def test_merge_workload(): + node.query( + f""" + drop table if exists data; + create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3'; + """ + ) + + reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/sys/merges'" + ).strip() + ) + writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/sys/merges'" + ).strip() + ) + + node.query(f"insert into data select * from numbers(1e4)") + node.query(f"insert into data select * from numbers(2e4)") + node.query(f"insert into data select * from numbers(3e4)") + node.query(f"optimize table data final") + + reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/sys/merges'" + ).strip() + ) + writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/sys/merges'" + ).strip() + ) + + assert reads_before < reads_after + assert writes_before < writes_after + + +def test_merge_workload_override(): + node.query( + f""" + drop table if exists prod_data; + drop table if exists dev_data; + create table prod_data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3', merge_workload='prod_merges'; + create table dev_data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3', merge_workload='dev_merges'; + """ + ) + + prod_reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/prod_merges'" + ).strip() + ) + prod_writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/prod_merges'" + ).strip() + ) + dev_reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/dev_merges'" + ).strip() + ) + dev_writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/dev_merges'" + ).strip() + ) + + node.query(f"insert into prod_data select * from numbers(1e4)") + node.query(f"insert into prod_data select * from numbers(2e4)") + node.query(f"insert into prod_data select * from numbers(3e4)") + node.query(f"insert into dev_data select * from numbers(1e4)") + node.query(f"insert into dev_data select * from numbers(2e4)") + node.query(f"insert into dev_data select * from numbers(3e4)") + node.query(f"optimize table prod_data final") + node.query(f"optimize table dev_data final") + + prod_reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/prod_merges'" + ).strip() + ) + prod_writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/prod_merges'" + ).strip() + ) + dev_reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/dev_merges'" + ).strip() + ) + dev_writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/dev_merges'" + ).strip() + ) + + assert prod_reads_before < prod_reads_after + assert prod_writes_before < prod_writes_after + assert dev_reads_before < dev_reads_after + assert dev_writes_before < dev_writes_after + + +def test_mutate_workload(): + node.query( + f""" + drop table if exists data; + create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3'; + """ + ) + + node.query(f"insert into data select * from numbers(1e4)") + node.query(f"optimize table data final") + + reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/sys/mutations'" + ).strip() + ) + writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/sys/mutations'" + ).strip() + ) + + node.query(f"alter table data update key = 1 where key = 42") + node.query(f"optimize table data final") + + reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/sys/mutations'" + ).strip() + ) + writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/sys/mutations'" + ).strip() + ) + + assert reads_before < reads_after + assert writes_before < writes_after + + +def test_mutation_workload_override(): + node.query( + f""" + drop table if exists prod_data; + drop table if exists dev_data; + create table prod_data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3', mutation_workload='prod_mutations'; + create table dev_data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3', mutation_workload='dev_mutations'; + """ + ) + + node.query(f"insert into prod_data select * from numbers(1e4)") + node.query(f"optimize table prod_data final") + node.query(f"insert into dev_data select * from numbers(1e4)") + node.query(f"optimize table dev_data final") + + prod_reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/prod_mutations'" + ).strip() + ) + prod_writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/prod_mutations'" + ).strip() + ) + dev_reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/dev_mutations'" + ).strip() + ) + dev_writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/dev_mutations'" + ).strip() + ) + + node.query(f"alter table prod_data update key = 1 where key = 42") + node.query(f"optimize table prod_data final") + node.query(f"alter table dev_data update key = 1 where key = 42") + node.query(f"optimize table dev_data final") + + prod_reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/prod_mutations'" + ).strip() + ) + prod_writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/prod_mutations'" + ).strip() + ) + dev_reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/dev_mutations'" + ).strip() + ) + dev_writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/dev_mutations'" + ).strip() + ) + + assert prod_reads_before < prod_reads_after + assert prod_writes_before < prod_writes_after + assert dev_reads_before < dev_reads_after + assert dev_writes_before < dev_writes_after + + +def test_merge_workload_change(): + node.query( + f""" + drop table if exists data; + create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3'; + """ + ) + + for env in ["prod", "dev"]: + update_workloads_config(merge_workload=f"{env}_merges") + + reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/{env}_merges'" + ).strip() + ) + writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/{env}_merges'" + ).strip() + ) + + node.query(f"insert into data select * from numbers(1e4)") + node.query(f"insert into data select * from numbers(2e4)") + node.query(f"insert into data select * from numbers(3e4)") + node.query(f"optimize table data final") + + reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/{env}_merges'" + ).strip() + ) + writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/{env}_merges'" + ).strip() + ) + + assert reads_before < reads_after + assert writes_before < writes_after + + +def test_mutation_workload_change(): + node.query( + f""" + drop table if exists data; + create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3'; + """ + ) + + for env in ["prod", "dev"]: + update_workloads_config(mutation_workload=f"{env}_mutations") + + node.query(f"insert into data select * from numbers(1e4)") + node.query(f"optimize table data final") + + reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/{env}_mutations'" + ).strip() + ) + writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/{env}_mutations'" + ).strip() + ) + + node.query(f"alter table data update key = 1 where key = 42") + node.query(f"optimize table data final") + + reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/{env}_mutations'" + ).strip() + ) + writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/{env}_mutations'" + ).strip() + ) + + assert reads_before < reads_after + assert writes_before < writes_after diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index d986c1f9746..20b004a7605 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -10,13 +10,10 @@ import threading import time from azure.storage.blob import BlobServiceClient -import helpers.client import pytest from helpers.cluster import ClickHouseCluster, ClickHouseInstance -from helpers.network import PartitionManager -from helpers.mock_servers import start_mock_servers -from helpers.test_tools import exec_query_with_retry from helpers.test_tools import assert_logs_contain_with_retry +from helpers.test_tools import TSV @pytest.fixture(scope="module") @@ -1236,7 +1233,7 @@ def test_filtering_by_file_or_path(cluster): node.query("SYSTEM FLUSH LOGS") result = node.query( - f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query like '%select%azure%test_filter%' AND type='QueryFinish'" + f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query ilike '%select%azure%test_filter%' AND type='QueryFinish'" ) assert int(result) == 1 @@ -1431,3 +1428,37 @@ def test_respect_object_existence_on_partitioned_write(cluster): ) assert int(result) == 44 + + +def test_insert_create_new_file(cluster): + node = cluster.instances["node"] + storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"] + account_name = "devstoreaccount1" + account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_create_new_file.csv', '{account_name}', '{account_key}', 'a UInt64') VALUES (1)", + settings={ + "azure_truncate_on_insert": False, + "azure_create_new_file_on_insert": True, + }, + ) + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_create_new_file.csv', '{account_name}', '{account_key}', 'a UInt64') VALUES (2)", + settings={ + "azure_truncate_on_insert": False, + "azure_create_new_file_on_insert": True, + }, + ) + + res = azure_query( + node, + f"SELECT _file, * FROM azureBlobStorage('{storage_account_url}', 'cont', 'test_create_new_file*', '{account_name}', '{account_key}', 'a UInt64') ORDER BY a", + ) + + assert TSV(res) == TSV( + "test_create_new_file.csv\t1\ntest_create_new_file.1.csv\t2\n" + ) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index b2ebd12ce00..9a0cb352088 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -2196,6 +2196,12 @@ def test_filtering_by_file_or_path(started_cluster): assert int(result) == 1 + assert 0 == int( + instance.query( + f"select count() from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter*.tsv') where _file = 'kek'" + ) + ) + def test_union_schema_inference_mode(started_cluster): bucket = started_cluster.minio_bucket diff --git a/tests/integration/test_storage_s3_queue/configs/merge_tree.xml b/tests/integration/test_storage_s3_queue/configs/merge_tree.xml new file mode 100644 index 00000000000..61eba8face7 --- /dev/null +++ b/tests/integration/test_storage_s3_queue/configs/merge_tree.xml @@ -0,0 +1,5 @@ + + + 0 + + diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 66631c51b03..b93e560d5b9 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -12,6 +12,7 @@ import json AVAILABLE_MODES = ["unordered", "ordered"] DEFAULT_AUTH = ["'minio'", "'minio123'"] NO_AUTH = ["NOSIGN"] +AZURE_CONTAINER_NAME = "cont" def prepare_public_s3_bucket(started_cluster): @@ -84,6 +85,7 @@ def started_cluster(): "instance", user_configs=["configs/users.xml"], with_minio=True, + with_azurite=True, with_zookeeper=True, main_configs=[ "configs/zookeeper.xml", @@ -110,11 +112,27 @@ def started_cluster(): with_installed_binary=True, use_old_analyzer=True, ) + cluster.add_instance( + "instance_too_many_parts", + user_configs=["configs/users.xml"], + with_minio=True, + with_zookeeper=True, + main_configs=[ + "configs/s3queue_log.xml", + "configs/merge_tree.xml", + ], + stay_alive=True, + ) logging.info("Starting cluster...") cluster.start() logging.info("Cluster started") + container_client = cluster.blob_service_client.get_container_client( + AZURE_CONTAINER_NAME + ) + container_client.create_container() + yield cluster finally: cluster.shutdown() @@ -134,6 +152,7 @@ def generate_random_files( started_cluster, files_path, count, + storage="s3", column_num=3, row_num=10, start_ind=0, @@ -155,7 +174,10 @@ def generate_random_files( values_csv = ( "\n".join((",".join(map(str, row)) for row in rand_values)) + "\n" ).encode() - put_s3_file_content(started_cluster, filename, values_csv, bucket) + if storage == "s3": + put_s3_file_content(started_cluster, filename, values_csv, bucket) + else: + put_azure_file_content(started_cluster, filename, values_csv, bucket) return total_values @@ -165,12 +187,21 @@ def put_s3_file_content(started_cluster, filename, data, bucket=None): started_cluster.minio_client.put_object(bucket, filename, buf, len(data)) +def put_azure_file_content(started_cluster, filename, data, bucket=None): + client = started_cluster.blob_service_client.get_blob_client( + AZURE_CONTAINER_NAME, filename + ) + buf = io.BytesIO(data) + client.upload_blob(buf, "BlockBlob", len(data)) + + def create_table( started_cluster, node, table_name, mode, files_path, + engine_name="S3Queue", format="column1 UInt32, column2 UInt32, column3 UInt32", additional_settings={}, file_format="CSV", @@ -189,11 +220,17 @@ def create_table( } settings.update(additional_settings) - url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{files_path}/" + engine_def = None + if engine_name == "S3Queue": + url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{files_path}/" + engine_def = f"{engine_name}('{url}', {auth_params}, {file_format})" + else: + engine_def = f"{engine_name}('{started_cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', '{files_path}/', 'CSV')" + node.query(f"DROP TABLE IF EXISTS {table_name}") create_query = f""" CREATE TABLE {table_name} ({format}) - ENGINE = S3Queue('{url}', {auth_params}, {file_format}) + ENGINE = {engine_def} SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))} """ @@ -224,17 +261,22 @@ def create_mv( ) -@pytest.mark.parametrize("mode", AVAILABLE_MODES) -def test_delete_after_processing(started_cluster, mode): +@pytest.mark.parametrize("mode", ["unordered", "ordered"]) +@pytest.mark.parametrize("engine_name", ["S3Queue", "AzureQueue"]) +def test_delete_after_processing(started_cluster, mode, engine_name): node = started_cluster.instances["instance"] - table_name = f"test.delete_after_processing_{mode}" + table_name = f"test.delete_after_processing_{mode}_{engine_name}" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" files_num = 5 row_num = 10 + if engine_name == "S3Queue": + storage = "s3" + else: + storage = "azure" total_values = generate_random_files( - started_cluster, files_path, files_num, row_num=row_num + started_cluster, files_path, files_num, row_num=row_num, storage=storage ) create_table( started_cluster, @@ -243,6 +285,7 @@ def test_delete_after_processing(started_cluster, mode): mode, files_path, additional_settings={"after_processing": "delete"}, + engine_name=engine_name, ) create_mv(node, table_name, dst_table_name) @@ -263,15 +306,24 @@ def test_delete_after_processing(started_cluster, mode): ).splitlines() ] == sorted(total_values, key=lambda x: (x[0], x[1], x[2])) - minio = started_cluster.minio_client - objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) - assert len(objects) == 0 + if engine_name == "S3Queue": + minio = started_cluster.minio_client + objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) + assert len(objects) == 0 + else: + client = started_cluster.blob_service_client.get_container_client( + AZURE_CONTAINER_NAME + ) + objects_iterator = client.list_blobs(files_path) + for objects in objects_iterator: + assert False -@pytest.mark.parametrize("mode", AVAILABLE_MODES) -def test_failed_retry(started_cluster, mode): +@pytest.mark.parametrize("mode", ["unordered", "ordered"]) +@pytest.mark.parametrize("engine_name", ["S3Queue", "AzureQueue"]) +def test_failed_retry(started_cluster, mode, engine_name): node = started_cluster.instances["instance"] - table_name = f"test.failed_retry_{mode}" + table_name = f"test.failed_retry_{mode}_{engine_name}" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" file_path = f"{files_path}/trash_test.csv" @@ -284,7 +336,10 @@ def test_failed_retry(started_cluster, mode): values_csv = ( "\n".join((",".join(map(str, row)) for row in values)) + "\n" ).encode() - put_s3_file_content(started_cluster, file_path, values_csv) + if engine_name == "S3Queue": + put_s3_file_content(started_cluster, file_path, values_csv) + else: + put_azure_file_content(started_cluster, file_path, values_csv) create_table( started_cluster, @@ -296,6 +351,7 @@ def test_failed_retry(started_cluster, mode): "s3queue_loading_retries": retries_num, "keeper_path": keeper_path, }, + engine_name=engine_name, ) create_mv(node, table_name, dst_table_name) @@ -352,6 +408,7 @@ def test_direct_select_file(started_cluster, mode): files_path, additional_settings={ "keeper_path": keeper_path, + "s3queue_processing_threads_num": 1, }, ) @@ -379,6 +436,7 @@ def test_direct_select_file(started_cluster, mode): files_path, additional_settings={ "keeper_path": keeper_path, + "s3queue_processing_threads_num": 1, }, ) @@ -397,6 +455,7 @@ def test_direct_select_file(started_cluster, mode): files_path, additional_settings={ "keeper_path": keeper_path, + "s3queue_processing_threads_num": 1, }, ) @@ -778,10 +837,12 @@ def test_max_set_age(started_cluster): files_path, additional_settings={ "keeper_path": keeper_path, - "s3queue_tracked_file_ttl_sec": max_age, - "s3queue_cleanup_interval_min_ms": 0, - "s3queue_cleanup_interval_max_ms": 0, - "s3queue_loading_retries": 0, + "tracked_file_ttl_sec": max_age, + "cleanup_interval_min_ms": max_age / 3, + "cleanup_interval_max_ms": max_age / 3, + "loading_retries": 0, + "processing_threads_num": 1, + "loading_retries": 0, }, ) create_mv(node, table_name, dst_table_name) @@ -806,7 +867,7 @@ def test_max_set_age(started_cluster): assert expected_rows == get_count() assert 10 == int(node.query(f"SELECT uniq(_path) from {dst_table_name}")) - time.sleep(max_age + 1) + time.sleep(max_age + 5) expected_rows = 20 @@ -830,7 +891,7 @@ def test_max_set_age(started_cluster): failed_count = int( node.query( - "SELECT value FROM system.events WHERE name = 'S3QueueFailedFiles' SETTINGS system_events_show_zero_values=1" + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" ) ) @@ -845,7 +906,7 @@ def test_max_set_age(started_cluster): for _ in range(30): if failed_count + 1 == int( node.query( - "SELECT value FROM system.events WHERE name = 'S3QueueFailedFiles' SETTINGS system_events_show_zero_values=1" + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" ) ): break @@ -853,7 +914,7 @@ def test_max_set_age(started_cluster): assert failed_count + 1 == int( node.query( - "SELECT value FROM system.events WHERE name = 'S3QueueFailedFiles' SETTINGS system_events_show_zero_values=1" + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" ) ) @@ -861,6 +922,11 @@ def test_max_set_age(started_cluster): assert "Cannot parse input" in node.query( "SELECT exception FROM system.s3queue WHERE file_name ilike '%fff.csv'" ) + assert 1 == int( + node.query( + "SELECT count() FROM system.s3queue_log WHERE file_name ilike '%fff.csv'" + ) + ) assert 1 == int( node.query( "SELECT count() FROM system.s3queue_log WHERE file_name ilike '%fff.csv' AND notEmpty(exception)" @@ -870,14 +936,16 @@ def test_max_set_age(started_cluster): time.sleep(max_age + 1) assert failed_count + 2 == int( - node.query("SELECT value FROM system.events WHERE name = 'S3QueueFailedFiles'") + node.query( + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles'" + ) ) node.query("SYSTEM FLUSH LOGS") assert "Cannot parse input" in node.query( "SELECT exception FROM system.s3queue WHERE file_name ilike '%fff.csv' ORDER BY processing_end_time DESC LIMIT 1" ) - assert 2 == int( + assert 1 < int( node.query( "SELECT count() FROM system.s3queue_log WHERE file_name ilike '%fff.csv' AND notEmpty(exception)" ) @@ -1284,7 +1352,7 @@ def test_shards_distributed(started_cluster, mode, processing_threads): def get_count(node, table_name): return int(run_query(node, f"SELECT count() FROM {table_name}")) - for _ in range(10): + for _ in range(30): if ( get_count(node, dst_table_name) + get_count(node_2, dst_table_name) ) == total_rows: @@ -1414,7 +1482,7 @@ def test_settings_check(started_cluster): ) assert ( - "Existing table metadata in ZooKeeper differs in s3queue_buckets setting. Stored in ZooKeeper: 2, local: 3" + "Existing table metadata in ZooKeeper differs in buckets setting. Stored in ZooKeeper: 2, local: 3" in create_table( started_cluster, node_2, @@ -1577,3 +1645,156 @@ def test_upgrade(started_cluster): node.restart_with_latest_version() assert expected_rows == get_count() + + +def test_exception_during_insert(started_cluster): + node = started_cluster.instances["instance_too_many_parts"] + + table_name = f"test_exception_during_insert" + dst_table_name = f"{table_name}_dst" + keeper_path = f"/clickhouse/test_{table_name}" + files_path = f"{table_name}_data" + files_to_generate = 10 + + create_table( + started_cluster, + node, + table_name, + "unordered", + files_path, + additional_settings={ + "keeper_path": keeper_path, + }, + ) + total_values = generate_random_files( + started_cluster, files_path, files_to_generate, start_ind=0, row_num=1 + ) + + create_mv(node, table_name, dst_table_name) + + node.wait_for_log_line( + "Failed to process data: Code: 252. DB::Exception: Too many parts" + ) + + time.sleep(2) + exception = node.query( + f"SELECT exception FROM system.s3queue WHERE zookeeper_path ilike '%{table_name}%' and notEmpty(exception)" + ) + assert "Too many parts" in exception + + node.replace_in_config( + "/etc/clickhouse-server/config.d/merge_tree.xml", + "parts_to_throw_insert>0", + "parts_to_throw_insert>10", + ) + node.restart_clickhouse() + + def get_count(): + return int(node.query(f"SELECT count() FROM {dst_table_name}")) + + expected_rows = 10 + for _ in range(20): + if expected_rows == get_count(): + break + time.sleep(1) + assert expected_rows == get_count() + + +def test_commit_on_limit(started_cluster): + node = started_cluster.instances["instance"] + + table_name = f"test_commit_on_limit" + dst_table_name = f"{table_name}_dst" + keeper_path = f"/clickhouse/test_{table_name}" + files_path = f"{table_name}_data" + files_to_generate = 10 + + create_table( + started_cluster, + node, + table_name, + "ordered", + files_path, + additional_settings={ + "keeper_path": keeper_path, + "s3queue_processing_threads_num": 1, + "s3queue_loading_retries": 0, + "s3queue_max_processed_files_before_commit": 10, + }, + ) + total_values = generate_random_files( + started_cluster, files_path, files_to_generate, start_ind=0, row_num=1 + ) + + incorrect_values = [ + ["failed", 1, 1], + ] + incorrect_values_csv = ( + "\n".join((",".join(map(str, row)) for row in incorrect_values)) + "\n" + ).encode() + + correct_values = [ + [1, 1, 1], + ] + correct_values_csv = ( + "\n".join((",".join(map(str, row)) for row in correct_values)) + "\n" + ).encode() + + put_s3_file_content( + started_cluster, f"{files_path}/test_99.csv", correct_values_csv + ) + put_s3_file_content( + started_cluster, f"{files_path}/test_999.csv", correct_values_csv + ) + put_s3_file_content( + started_cluster, f"{files_path}/test_9999.csv", incorrect_values_csv + ) + put_s3_file_content( + started_cluster, f"{files_path}/test_99999.csv", correct_values_csv + ) + put_s3_file_content( + started_cluster, f"{files_path}/test_999999.csv", correct_values_csv + ) + + create_mv(node, table_name, dst_table_name) + + def get_processed_files(): + return ( + node.query( + f"SELECT file_name FROM system.s3queue WHERE zookeeper_path ilike '%{table_name}%' and status = 'Processed' and rows_processed > 0 " + ) + .strip() + .split("\n") + ) + + def get_failed_files(): + return ( + node.query( + f"SELECT file_name FROM system.s3queue WHERE zookeeper_path ilike '%{table_name}%' and status = 'Failed'" + ) + .strip() + .split("\n") + ) + + for _ in range(30): + if "test_999999.csv" in get_processed_files(): + break + time.sleep(1) + assert "test_999999.csv" in get_processed_files() + + assert 1 == int( + node.query( + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" + ) + ) + + expected_processed = ["test_" + str(i) + ".csv" for i in range(files_to_generate)] + processed = get_processed_files() + for value in expected_processed: + assert value in processed + + expected_failed = ["test_9999.csv"] + failed = get_failed_files() + for value in expected_failed: + assert value not in processed + assert value in failed diff --git a/tests/integration/test_system_flush_logs/test.py b/tests/integration/test_system_flush_logs/test.py index 084d342d736..2022f9d4a89 100644 --- a/tests/integration/test_system_flush_logs/test.py +++ b/tests/integration/test_system_flush_logs/test.py @@ -2,7 +2,6 @@ # pylint: disable=unused-argument # pylint: disable=redefined-outer-name -import time import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry @@ -22,6 +21,7 @@ system_logs = [ ("system.part_log", 1), ("system.trace_log", 1), ("system.metric_log", 1), + ("system.error_log", 1), ] diff --git a/tests/integration/test_system_logs_recreate/test.py b/tests/integration/test_system_logs_recreate/test.py index 2e8a0e4e877..1bdb1fe3261 100644 --- a/tests/integration/test_system_logs_recreate/test.py +++ b/tests/integration/test_system_logs_recreate/test.py @@ -30,6 +30,7 @@ def test_system_logs_recreate(): "part_log", "trace_log", "metric_log", + "error_log", ] node.query("SYSTEM FLUSH LOGS") diff --git a/tests/integration/test_table_db_num_limit/__init__.py b/tests/integration/test_table_db_num_limit/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_table_db_num_limit/config/config.xml b/tests/integration/test_table_db_num_limit/config/config.xml new file mode 100644 index 00000000000..9a573b158fe --- /dev/null +++ b/tests/integration/test_table_db_num_limit/config/config.xml @@ -0,0 +1,5 @@ + + 10 + 10 + + diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py new file mode 100644 index 00000000000..aa8030b077c --- /dev/null +++ b/tests/integration/test_table_db_num_limit/test.py @@ -0,0 +1,43 @@ +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", main_configs=["config/config.xml"], with_zookeeper=True +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def test_table_db_limit(started_cluster): + for i in range(10): + node1.query("create database db{}".format(i)) + + with pytest.raises(QueryRuntimeException) as exp_info: + node1.query("create database db_exp".format(i)) + + assert "TOO_MANY_DATABASES" in str(exp_info) + + for i in range(10): + node1.query("create table t{} (a Int32) Engine = Log".format(i)) + + node1.query("system flush logs") + for i in range(10): + node1.query("drop table t{}".format(i)) + for i in range(10): + node1.query("create table t{} (a Int32) Engine = Log".format(i)) + + with pytest.raises(QueryRuntimeException) as exp_info: + node1.query("create table default.tx (a Int32) Engine = Log") + assert "TOO_MANY_TABLES" in str(exp_info) diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing2.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing2.xml new file mode 100644 index 00000000000..fd416cad505 --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing2.xml @@ -0,0 +1,35 @@ + + + + random + + 1 + + + 0 + 1 + + + + zoo1 + 2181 + az1 + + + zoo2 + 2181 + az2 + + + zoo3 + 2181 + az3 + + 3000 + + + + 0 + az2 + + diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py index f17e0c3f03f..9cdf7db2b08 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -1,6 +1,8 @@ +import time import pytest from helpers.cluster import ClickHouseCluster from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster( __file__, zookeeper_config_path="configs/zookeeper_load_balancing.xml" @@ -17,6 +19,10 @@ node3 = cluster.add_instance( "nod3", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"] ) +node4 = cluster.add_instance( + "nod4", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing2.xml"] +) + def change_balancing(old, new, reload=True): line = "{}<" @@ -405,113 +411,57 @@ def test_hostname_levenshtein_distance(started_cluster): def test_round_robin(started_cluster): pm = PartitionManager() try: - pm._add_rule( - { - "source": node1.ip_address, - "destination": cluster.get_instance_ip("zoo1"), - "action": "REJECT --reject-with tcp-reset", - } - ) - pm._add_rule( - { - "source": node2.ip_address, - "destination": cluster.get_instance_ip("zoo1"), - "action": "REJECT --reject-with tcp-reset", - } - ) - pm._add_rule( - { - "source": node3.ip_address, - "destination": cluster.get_instance_ip("zoo1"), - "action": "REJECT --reject-with tcp-reset", - } - ) change_balancing("random", "round_robin") - - print( - str( - node1.exec_in_container( - [ - "bash", - "-c", - "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", - ], - privileged=True, - user="root", - ) + for node in [node1, node2, node3]: + idx = int( + node.query("select index from system.zookeeper_connection").strip() ) - ) - assert ( - "1" - == str( - node1.exec_in_container( - [ - "bash", - "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", - ], - privileged=True, - user="root", - ) - ).strip() - ) + new_idx = (idx + 1) % 3 - print( - str( - node2.exec_in_container( - [ - "bash", - "-c", - "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", - ], - privileged=True, - user="root", - ) + pm._add_rule( + { + "source": node.ip_address, + "destination": cluster.get_instance_ip("zoo" + str(idx + 1)), + "action": "REJECT --reject-with tcp-reset", + } ) - ) - assert ( - "1" - == str( - node2.exec_in_container( - [ - "bash", - "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", - ], - privileged=True, - user="root", - ) - ).strip() - ) - print( - str( - node3.exec_in_container( - [ - "bash", - "-c", - "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", - ], - privileged=True, - user="root", - ) + assert_eq_with_retry( + node, + "select index from system.zookeeper_connection", + str(new_idx) + "\n", ) - ) - assert ( - "1" - == str( - node3.exec_in_container( - [ - "bash", - "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", - ], - privileged=True, - user="root", - ) - ).strip() - ) - + pm.heal_all() finally: pm.heal_all() change_balancing("round_robin", "random", reload=False) + + +def test_az(started_cluster): + pm = PartitionManager() + try: + # make sure it disconnects from the optimal node + pm._add_rule( + { + "source": node4.ip_address, + "destination": cluster.get_instance_ip("zoo2"), + "action": "REJECT --reject-with tcp-reset", + } + ) + + node4.query_with_retry("select * from system.zookeeper where path='/'") + assert "az2\n" != node4.query( + "select availability_zone from system.zookeeper_connection" + ) + + # fallback_session_lifetime.max is 1 second, but it shouldn't drop current session until the node becomes available + + time.sleep(5) # this is fine + assert 5 <= int(node4.query("select zookeeperSessionUptime()").strip()) + + pm.heal_all() + assert_eq_with_retry( + node4, "select availability_zone from system.zookeeper_connection", "az2\n" + ) + finally: + pm.heal_all() diff --git a/tests/integration/test_zookeeper_fallback_session/test.py b/tests/integration/test_zookeeper_fallback_session/test.py index 9afabfa3da3..932bbe482d2 100644 --- a/tests/integration/test_zookeeper_fallback_session/test.py +++ b/tests/integration/test_zookeeper_fallback_session/test.py @@ -84,10 +84,28 @@ def test_fallback_session(started_cluster: ClickHouseCluster): ) # at this point network partitioning has been reverted. - # the nodes should switch to zoo1 automatically because of `in_order` load-balancing. + # the nodes should switch to zoo1 because of `in_order` load-balancing. # otherwise they would connect to a random replica + + # but there's no reason to reconnect because current session works + # and there's no "optimal" node with `in_order` load-balancing + # so we need to break the current session + for node in [node1, node2, node3]: - assert_uses_zk_node(node, "zoo1") + assert_uses_zk_node(node, "zoo3") + + with PartitionManager() as pm: + for node in started_cluster.instances.values(): + pm._add_rule( + { + "source": node.ip_address, + "destination": cluster.get_instance_ip("zoo3"), + "action": "REJECT --reject-with tcp-reset", + } + ) + + for node in [node1, node2, node3]: + assert_uses_zk_node(node, "zoo1") node1.query_with_retry("INSERT INTO simple VALUES ({0}, {0})".format(2)) for node in [node2, node3]: diff --git a/tests/jepsen.clickhouse/resources/keeper_config.xml b/tests/jepsen.clickhouse/resources/keeper_config.xml index 6bc4ad89839..b5c0aac6a1b 100644 --- a/tests/jepsen.clickhouse/resources/keeper_config.xml +++ b/tests/jepsen.clickhouse/resources/keeper_config.xml @@ -33,6 +33,7 @@ 9181 {id} + 1 10000 diff --git a/tests/jepsen.clickhouse/resources/keeper_config_solo.xml b/tests/jepsen.clickhouse/resources/keeper_config_solo.xml index 0054cad8f85..6896beb9a4d 100644 --- a/tests/jepsen.clickhouse/resources/keeper_config_solo.xml +++ b/tests/jepsen.clickhouse/resources/keeper_config_solo.xml @@ -11,6 +11,7 @@ 9181 1 + 1 10000 diff --git a/tests/performance/scripts/compare.sh b/tests/performance/scripts/compare.sh index 9a0fb5b335c..cb56ab6c5bf 100755 --- a/tests/performance/scripts/compare.sh +++ b/tests/performance/scripts/compare.sh @@ -87,6 +87,7 @@ function configure --path db0 --user_files_path db0/user_files --top_level_domains_path "$(left_or_right right top_level_domains)" + --keeper_server.storage_path coordination0 --tcp_port $LEFT_SERVER_PORT ) left/clickhouse-server "${setup_left_server_opts[@]}" &> setup-server-log.log & @@ -113,8 +114,12 @@ function configure rm -r db0/preprocessed_configs ||: rm -r db0/{data,metadata}/system ||: rm db0/status ||: + cp -al db0/ left/db/ + cp -R coordination0 left/coordination + cp -al db0/ right/db/ + cp -R coordination0 right/coordination } function restart @@ -135,6 +140,7 @@ function restart --tcp_port $LEFT_SERVER_PORT --keeper_server.tcp_port $LEFT_SERVER_KEEPER_PORT --keeper_server.raft_configuration.server.port $LEFT_SERVER_KEEPER_RAFT_PORT + --keeper_server.storage_path left/coordination --zookeeper.node.port $LEFT_SERVER_KEEPER_PORT --interserver_http_port $LEFT_SERVER_INTERSERVER_PORT ) @@ -154,6 +160,7 @@ function restart --tcp_port $RIGHT_SERVER_PORT --keeper_server.tcp_port $RIGHT_SERVER_KEEPER_PORT --keeper_server.raft_configuration.server.port $RIGHT_SERVER_KEEPER_RAFT_PORT + --keeper_server.storage_path right/coordination --zookeeper.node.port $RIGHT_SERVER_KEEPER_PORT --interserver_http_port $RIGHT_SERVER_INTERSERVER_PORT ) diff --git a/tests/queries/0_stateless/00137_in_constants.reference b/tests/queries/0_stateless/00137_in_constants.reference index 379885fb1ab..94607ffa924 100644 --- a/tests/queries/0_stateless/00137_in_constants.reference +++ b/tests/queries/0_stateless/00137_in_constants.reference @@ -13,6 +13,7 @@ 1 1 1 +1 0 0 0 diff --git a/tests/queries/0_stateless/00137_in_constants.sql b/tests/queries/0_stateless/00137_in_constants.sql index 297acc4ef26..bc365523be1 100644 --- a/tests/queries/0_stateless/00137_in_constants.sql +++ b/tests/queries/0_stateless/00137_in_constants.sql @@ -13,6 +13,7 @@ SELECT 'Hello' IN (SELECT 'Hello'); SELECT materialize('Hello') IN (SELECT 'Hello'); SELECT 'Hello' IN (SELECT materialize('Hello')); SELECT materialize('Hello') IN (SELECT materialize('Hello')); +SELECT toDate('2020-01-01') IN (toDateTime('2020-01-01', 'UTC')); SELECT 2 IN (SELECT 1); SELECT materialize(2) IN (SELECT 1); diff --git a/tests/queries/0_stateless/00515_enhanced_time_zones.sql b/tests/queries/0_stateless/00515_enhanced_time_zones.sql index 837b0b4be20..f7eb90fa5c8 100644 --- a/tests/queries/0_stateless/00515_enhanced_time_zones.sql +++ b/tests/queries/0_stateless/00515_enhanced_time_zones.sql @@ -1,3 +1,5 @@ +SET allow_deprecated_snowflake_conversion_functions = 1; + SELECT addMonths(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 1, 'Asia/Kolkata'); SELECT addMonths(toDateTime('2017-11-05 10:37:47', 'Asia/Kolkata'), 1); SELECT addMonths(toTimeZone(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 'Asia/Kolkata'), 1); diff --git a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh index 1bb4dbd34de..af746c43da9 100755 --- a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh +++ b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage +# Tags: long, no-s3-storage, no-tsan # no-s3 because read FileOpen metric set -e @@ -31,6 +31,6 @@ $CLICKHOUSE_CLIENT $settings -q "$touching_many_parts_query" &> /dev/null $CLICKHOUSE_CLIENT $settings -q "SYSTEM FLUSH LOGS" -$CLICKHOUSE_CLIENT $settings -q "SELECT ProfileEvents['FileOpen'] as opened_files FROM system.query_log WHERE query='$touching_many_parts_query' and current_database = currentDatabase() ORDER BY event_time DESC, opened_files DESC LIMIT 1;" +$CLICKHOUSE_CLIENT $settings -q "SELECT ProfileEvents['FileOpen'] as opened_files FROM system.query_log WHERE query = '$touching_many_parts_query' AND current_database = currentDatabase() AND event_date >= yesterday() ORDER BY event_time DESC, opened_files DESC LIMIT 1;" $CLICKHOUSE_CLIENT $settings -q "DROP TABLE IF EXISTS merge_tree_table;" diff --git a/tests/queries/0_stateless/00763_lock_buffer_long.sh b/tests/queries/0_stateless/00763_lock_buffer_long.sh index 50680724149..046e4efaa85 100755 --- a/tests/queries/0_stateless/00763_lock_buffer_long.sh +++ b/tests/queries/0_stateless/00763_lock_buffer_long.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-s3-storage, no-msan, no-asan, no-tsan, no-debug +# Some kind of stress test, it doesn't make sense to test in a non-release build set -e @@ -15,7 +16,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE buffer_00763_2 (s String) ENGINE = Bu function thread1() { - seq 1 500 | sed -r -e 's/.+/DROP TABLE IF EXISTS mt_00763_2; CREATE TABLE mt_00763_2 (s String) ENGINE = MergeTree ORDER BY s; INSERT INTO mt_00763_2 SELECT toString(number) FROM numbers(10);/' | ${CLICKHOUSE_CLIENT} --multiquery --ignore-error ||: + seq 1 500 | sed -r -e 's/.+/DROP TABLE IF EXISTS mt_00763_2; CREATE TABLE mt_00763_2 (s String) ENGINE = MergeTree ORDER BY s; INSERT INTO mt_00763_2 SELECT toString(number) FROM numbers(10);/' | ${CLICKHOUSE_CLIENT} --fsync-metadata 0 --multiquery --ignore-error ||: } function thread2() diff --git a/tests/queries/0_stateless/00830_join_overwrite.reference b/tests/queries/0_stateless/00830_join_overwrite.reference index 4792e70f333..e7d6081b647 100644 --- a/tests/queries/0_stateless/00830_join_overwrite.reference +++ b/tests/queries/0_stateless/00830_join_overwrite.reference @@ -1,2 +1,4 @@ 2 3 +2 +3 diff --git a/tests/queries/0_stateless/00830_join_overwrite.sql b/tests/queries/0_stateless/00830_join_overwrite.sql index cb7e277906b..bc3662528db 100644 --- a/tests/queries/0_stateless/00830_join_overwrite.sql +++ b/tests/queries/0_stateless/00830_join_overwrite.sql @@ -9,5 +9,14 @@ INSERT INTO kv_overwrite VALUES (1, 2); INSERT INTO kv_overwrite VALUES (1, 3); SELECT joinGet('kv_overwrite', 'v', toUInt32(1)); + +CREATE TABLE t2 (k UInt32, v UInt32) ENGINE = Memory; +INSERT INTO t2 VALUES (1, 2), (1, 3); + +SET allow_experimental_analyzer = 1; + +SELECT v FROM (SELECT 1 as k) t1 ANY INNER JOIN t2 USING (k) SETTINGS join_any_take_last_row = 0; +SELECT v FROM (SELECT 1 as k) t1 ANY INNER JOIN t2 USING (k) SETTINGS join_any_take_last_row = 1; + DROP TABLE kv; DROP TABLE kv_overwrite; diff --git a/tests/queries/0_stateless/00987_distributed_stack_overflow.sql b/tests/queries/0_stateless/00987_distributed_stack_overflow.sql index 5a22ac56413..ba58713fe0e 100644 --- a/tests/queries/0_stateless/00987_distributed_stack_overflow.sql +++ b/tests/queries/0_stateless/00987_distributed_stack_overflow.sql @@ -9,10 +9,6 @@ CREATE TABLE distr (x UInt8) ENGINE = Distributed(test_shard_localhost, currentD CREATE TABLE distr0 (x UInt8) ENGINE = Distributed(test_shard_localhost, '', distr0); -- { serverError INFINITE_LOOP } CREATE TABLE distr1 (x UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), distr2); -CREATE TABLE distr2 (x UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), distr1); - -SELECT * FROM distr1; -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } -SELECT * FROM distr2; -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } +CREATE TABLE distr2 (x UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), distr1); -- { serverError INFINITE_LOOP } DROP TABLE distr1; -DROP TABLE distr2; diff --git a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh index 3a2eac1f38f..26c2e54e637 100755 --- a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh +++ b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh @@ -54,10 +54,10 @@ function alter_thread() { for i in {0..5}; do ALTER[$i]="ALTER TABLE mv MODIFY QUERY SELECT v == 1 as test, v as case FROM src_a;" done - # Insert 3 ALTERs to src_b, one in the first half of the array and two in arbitrary positions. - ALTER[$RANDOM % 3]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" - ALTER[$RANDOM % 6]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" - ALTER[$RANDOM % 6]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" + # Insert 3 ALTERs to src_b randomly in each third of array. + ALTER[$RANDOM % 2]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" + ALTER[$RANDOM % 2 + 2]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" + ALTER[$RANDOM % 2 + 4]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" i=0 while true; do diff --git a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql index 6268765aa27..bdfbf2a47cf 100644 --- a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql +++ b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql @@ -88,6 +88,7 @@ SELECT sum(n) from rich_syntax; SYSTEM DROP DNS CACHE; DROP TABLE file; +DROP DICTIONARY dict; DROP TABLE url; DROP TABLE view; DROP TABLE buffer; @@ -96,4 +97,3 @@ DROP TABLE merge_tf; DROP TABLE distributed; DROP TABLE distributed_tf; DROP TABLE rich_syntax; -DROP DICTIONARY dict; diff --git a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference index 8984d35930a..03ed07cf1a4 100644 --- a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference +++ b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference @@ -2,6 +2,4 @@ Instruction check fail. The CPU does not support SSSE3 instruction set. Instruction check fail. The CPU does not support SSE4.1 instruction set. Instruction check fail. The CPU does not support SSE4.2 instruction set. Instruction check fail. The CPU does not support POPCNT instruction set. -: MADV_DONTNEED does not work (memset will be used instead) -: (This is the expected behaviour if you are running under QEMU) 1 diff --git a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh index 01047aeb9ab..c37f1f95374 100755 --- a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh +++ b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest, no-cpu-aarch64 -# Tag no-fasttest: avoid dependency on qemu -- invonvenient when running locally +# Tag no-fasttest: avoid dependency on qemu -- inconvenient when running locally CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01191_rename_dictionary.sql b/tests/queries/0_stateless/01191_rename_dictionary.sql index 6666c3308ca..c5012dabc81 100644 --- a/tests/queries/0_stateless/01191_rename_dictionary.sql +++ b/tests/queries/0_stateless/01191_rename_dictionary.sql @@ -17,7 +17,7 @@ SELECT name, status FROM system.dictionaries WHERE database='test_01191'; SELECT name, engine FROM system.tables WHERE database='test_01191' ORDER BY name; RENAME DICTIONARY test_01191.table TO test_01191.table1; -- {serverError UNKNOWN_TABLE} -EXCHANGE DICTIONARIES test_01191._ AND test_01191.dict; -- {serverError INCORRECT_QUERY} +EXCHANGE DICTIONARIES test_01191._ AND test_01191.dict; -- {serverError INFINITE_LOOP} EXCHANGE TABLES test_01191.t AND test_01191.dict; SELECT name, status FROM system.dictionaries WHERE database='test_01191'; SELECT name, engine FROM system.tables WHERE database='test_01191' ORDER BY name; diff --git a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.reference b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.reference index 2f2d638a294..9c2c59f6379 100644 --- a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.reference +++ b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.reference @@ -1,4 +1,4 @@ -0 NOT_LOADED +NOT_LOADED 0 LOADED 10 1 LOADED diff --git a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql index c11cb64735c..206ddeac612 100644 --- a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql +++ b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql @@ -17,7 +17,7 @@ LAYOUT(FLAT()); DETACH DATABASE dict_db_01254; ATTACH DATABASE dict_db_01254; -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; +SELECT COALESCE((SELECT status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict')::Nullable(String), 'NOT_LOADED'); SYSTEM RELOAD DICTIONARY dict_db_01254.dict; SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; SELECT dictGetUInt64('dict_db_01254.dict', 'val', toUInt64(0)); diff --git a/tests/queries/0_stateless/01393_benchmark_secure_port.sh b/tests/queries/0_stateless/01393_benchmark_secure_port.sh index 7954e439977..f75577e6ddf 100755 --- a/tests/queries/0_stateless/01393_benchmark_secure_port.sh +++ b/tests/queries/0_stateless/01393_benchmark_secure_port.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-tsan, no-asan +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01502_jemalloc_percpu_arena.reference b/tests/queries/0_stateless/01502_jemalloc_percpu_arena.reference index fe093e39a56..5accb577786 100644 --- a/tests/queries/0_stateless/01502_jemalloc_percpu_arena.reference +++ b/tests/queries/0_stateless/01502_jemalloc_percpu_arena.reference @@ -1,5 +1,3 @@ -: Number of CPUs detected is not deterministic. Per-CPU arena disabled. 1 -: Number of CPUs detected is not deterministic. Per-CPU arena disabled. 100000000 1 diff --git a/tests/queries/0_stateless/01502_jemalloc_percpu_arena.sh b/tests/queries/0_stateless/01502_jemalloc_percpu_arena.sh index b3ea6eca3f4..c1bd1e0e1fa 100755 --- a/tests/queries/0_stateless/01502_jemalloc_percpu_arena.sh +++ b/tests/queries/0_stateless/01502_jemalloc_percpu_arena.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-msan, no-ubsan, no-fasttest +# Tags: no-tsan, no-asan, no-msan, no-ubsan, no-fasttest, no-debug # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # NOTE: jemalloc is disabled under sanitizers diff --git a/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh b/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh index c2750ad31b2..35c2b796570 100755 --- a/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh +++ b/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh @@ -14,7 +14,7 @@ for _ in {1..10}; do ${CLICKHOUSE_LOCAL} -q 'select * from numbers_mt(100000000) settings max_threads=100 FORMAT Null' # Binding to specific CPU is not required, but this makes the test more reliable. taskset --cpu-list 0 ${CLICKHOUSE_LOCAL} -q 'select * from numbers_mt(100000000) settings max_threads=100 FORMAT Null' 2>&1 | { - # build with santiziers does not have jemalloc + # build with sanitiziers does not have jemalloc # and for jemalloc we have separate test # 01502_jemalloc_percpu_arena grep -v ': Number of CPUs detected is not deterministic. Per-CPU arena disabled.' diff --git a/tests/queries/0_stateless/01526_client_start_and_exit.expect-not-a-test-case b/tests/queries/0_stateless/01526_client_start_and_exit.expect-not-a-test-case deleted file mode 100755 index 00fb5c4e85b..00000000000 --- a/tests/queries/0_stateless/01526_client_start_and_exit.expect-not-a-test-case +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/expect -f - -log_user 1 -set timeout 5 -match_max 100000 - -spawn bash -c "$env(CLICKHOUSE_CLIENT_BINARY) --no-warnings $env(CLICKHOUSE_CLIENT_OPT)" -expect ":) " -send -- "\4" -expect eof diff --git a/tests/queries/0_stateless/01526_client_start_and_exit.reference b/tests/queries/0_stateless/01526_client_start_and_exit.reference deleted file mode 100644 index e3e2e7b22af..00000000000 --- a/tests/queries/0_stateless/01526_client_start_and_exit.reference +++ /dev/null @@ -1 +0,0 @@ -Loaded 10000 queries. diff --git a/tests/queries/0_stateless/01526_client_start_and_exit.sh b/tests/queries/0_stateless/01526_client_start_and_exit.sh deleted file mode 100755 index 0c5c94e3eac..00000000000 --- a/tests/queries/0_stateless/01526_client_start_and_exit.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Create a huge amount of tables, so Suggest will take a time to load -${CLICKHOUSE_CLIENT} -q "SELECT 'CREATE TABLE test_' || hex(randomPrintableASCII(40)) || '(x UInt8) Engine=Memory;' FROM numbers(10000)" --format=TSVRaw | ${CLICKHOUSE_BENCHMARK} -c32 -i 10000 -d 0 2>&1 | grep -F 'Loaded 10000 queries' - -function stress() -{ - # 2004l is ignored because parallel running expect emulated terminal doesn't - # work well with bracketed paste enabling sequence, which is \e033?2004l - # (https://cirw.in/blog/bracketed-paste) - while true; do - "${CURDIR}"/01526_client_start_and_exit.expect-not-a-test-case | grep -v -P 'ClickHouse client|Connecting|Connected|:\) Bye\.|new year|^\s*$|spawn bash|\?2004l|^0\s*$' - done -} - -export CURDIR -export -f stress - -for _ in {1..10}; do - timeout 3 bash -c stress & -done - -wait diff --git a/tests/queries/0_stateless/01552_dict_fixedstring.sql b/tests/queries/0_stateless/01552_dict_fixedstring.sql index 01d55656e3c..0b19c9980a4 100644 --- a/tests/queries/0_stateless/01552_dict_fixedstring.sql +++ b/tests/queries/0_stateless/01552_dict_fixedstring.sql @@ -16,5 +16,5 @@ LIFETIME(MIN 10 MAX 10); SELECT dictGet(currentDatabase() || '.dict', 's', number) FROM numbers(2); -DROP TABLE src; DROP DICTIONARY dict; +DROP TABLE src; diff --git a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference index d5bdb816bf2..e69de29bb2d 100644 --- a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference +++ b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference @@ -1 +0,0 @@ -Unknown data type family: CODEC diff --git a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sh b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sh deleted file mode 100755 index 8a3242c7036..00000000000 --- a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -${CLICKHOUSE_CLIENT} --query "CREATE TABLE t (c CODEC(NONE)) ENGINE = Memory" 2>&1 | grep -oF 'Unknown data type family: CODEC' | uniq diff --git a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sql b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sql new file mode 100644 index 00000000000..ab1cfc89be1 --- /dev/null +++ b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sql @@ -0,0 +1 @@ +CREATE TABLE t (c CODEC(NONE)) ENGINE = Memory -- { clientError SYNTAX_ERROR } \ No newline at end of file diff --git a/tests/queries/0_stateless/01602_array_aggregation.reference b/tests/queries/0_stateless/01602_array_aggregation.reference index ec8a0838401..bce8ac88c97 100644 --- a/tests/queries/0_stateless/01602_array_aggregation.reference +++ b/tests/queries/0_stateless/01602_array_aggregation.reference @@ -2,6 +2,10 @@ Array min 1 Array max 6 Array sum 21 Array avg 3.5 +Array min : +[1] +Array max : +[3] Table array int min 1 0 diff --git a/tests/queries/0_stateless/01602_array_aggregation.sql b/tests/queries/0_stateless/01602_array_aggregation.sql index 7c0f6eb8267..d8be9eb82f2 100644 --- a/tests/queries/0_stateless/01602_array_aggregation.sql +++ b/tests/queries/0_stateless/01602_array_aggregation.sql @@ -3,6 +3,12 @@ SELECT 'Array max ', (arrayMax(array(1,2,3,4,5,6))); SELECT 'Array sum ', (arraySum(array(1,2,3,4,5,6))); SELECT 'Array avg ', (arrayAvg(array(1,2,3,4,5,6))); +SELECT 'Array min :'; +SELECT arrayMin([[3], [1], [2]]); + +SELECT 'Array max :'; +SELECT arrayMax([[3], [1], [2]]); + DROP TABLE IF EXISTS test_aggregation; CREATE TABLE test_aggregation (x Array(Int)) ENGINE=TinyLog; diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index edf93b4b39f..a6af1f2170d 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -163,7 +163,6 @@ Filter column: notEquals(__table1.y, 2_UInt8) > filter is pushed down before CreatingSets CreatingSets Filter -Filter 1 3 > one condition of filter is pushed down before LEFT JOIN diff --git a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.reference b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.reference new file mode 100644 index 00000000000..4905c7f8a71 --- /dev/null +++ b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.reference @@ -0,0 +1,10 @@ + Filter (((WHERE + (Change column names to column identifiers + (Project names + Projection))) + HAVING)) + Filter column: and(notEquals(sum(__table2.number), 0_UInt8), equals(__table1.key, 7_UInt8)) (removed) + Aggregating + Filter (( + (Before GROUP BY + Change column names to column identifiers))) + Filter column: equals(__table1.key, 7_UInt8) (removed) + Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING)) + Filter column: and(notEquals(sum(number), 0), equals(key, 7)) (removed) + Aggregating + Filter ((( + Before GROUP BY) + WHERE)) + Filter column: and(equals(bitAnd(number, 15), 7), equals(key, 7)) (removed) diff --git a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql new file mode 100644 index 00000000000..1301135b4cb --- /dev/null +++ b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql @@ -0,0 +1,5 @@ +set allow_experimental_analyzer=1; +select explain from (explain actions = 1 select * from (select sum(number) as v, bitAnd(number, 15) as key from numbers(1e8) group by key having v != 0) where key = 7) where explain like '%Filter%' or explain like '%Aggregating%'; + +set allow_experimental_analyzer=0; +select explain from (explain actions = 1 select * from (select sum(number) as v, bitAnd(number, 15) as key from numbers(1e8) group by key having v != 0) where key = 7) where explain like '%Filter%' or explain like '%Aggregating%'; diff --git a/tests/queries/0_stateless/01676_dictget_in_default_expression.sql b/tests/queries/0_stateless/01676_dictget_in_default_expression.sql index 54e46a2b718..db23ae1919c 100644 --- a/tests/queries/0_stateless/01676_dictget_in_default_expression.sql +++ b/tests/queries/0_stateless/01676_dictget_in_default_expression.sql @@ -22,7 +22,8 @@ DETACH DATABASE test_01676; ATTACH DATABASE test_01676; SELECT 'status_after_detach_and_attach:'; -SELECT status FROM system.dictionaries WHERE database='test_01676' AND name='dict'; +-- It can be not loaded, or not even finish attaching in case of asynchronous tables loading. +SELECT COALESCE((SELECT status FROM system.dictionaries WHERE database='test_01676' AND name='dict')::Nullable(String), 'NOT_LOADED'); INSERT INTO test_01676.table (x) VALUES (toInt64(4)); SELECT * FROM test_01676.table ORDER BY x; diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.reference b/tests/queries/0_stateless/01683_text_log_deadlock.reference index 4cf61460252..3805f2a95e9 100644 --- a/tests/queries/0_stateless/01683_text_log_deadlock.reference +++ b/tests/queries/0_stateless/01683_text_log_deadlock.reference @@ -1 +1 @@ -queries: 25000 +queries: 5000 diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.sh b/tests/queries/0_stateless/01683_text_log_deadlock.sh index 1aced61cb42..6b3bcc58868 100755 --- a/tests/queries/0_stateless/01683_text_log_deadlock.sh +++ b/tests/queries/0_stateless/01683_text_log_deadlock.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash -# Tags: deadlock, no-tsan, no-asan +# Tags: deadlock CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_BENCHMARK --secure -i 25000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries: 25000' +$CLICKHOUSE_BENCHMARK --secure -i 5000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries: 5000' diff --git a/tests/queries/0_stateless/01760_ddl_dictionary_use_current_database_name.sql b/tests/queries/0_stateless/01760_ddl_dictionary_use_current_database_name.sql index 55c0d1e3678..a7f04921f1f 100644 --- a/tests/queries/0_stateless/01760_ddl_dictionary_use_current_database_name.sql +++ b/tests/queries/0_stateless/01760_ddl_dictionary_use_current_database_name.sql @@ -27,5 +27,5 @@ SELECT dictGet('ddl_dictionary_test', 'value', number) FROM system.numbers LIMIT SELECT 'dictHas'; SELECT dictHas('ddl_dictionary_test', number) FROM system.numbers LIMIT 3; -DROP TABLE ddl_dictonary_test_source; DROP DICTIONARY ddl_dictionary_test; +DROP TABLE ddl_dictonary_test_source; diff --git a/tests/queries/0_stateless/01760_system_dictionaries.sql b/tests/queries/0_stateless/01760_system_dictionaries.sql index a5609281e49..2e7d4184811 100644 --- a/tests/queries/0_stateless/01760_system_dictionaries.sql +++ b/tests/queries/0_stateless/01760_system_dictionaries.sql @@ -25,8 +25,8 @@ SELECT * FROM 01760_db.example_simple_key_dictionary; SELECT name, database, key.names, key.types, attribute.names, attribute.types, status FROM system.dictionaries WHERE database='01760_db'; -DROP TABLE 01760_db.example_simple_key_source; DROP DICTIONARY 01760_db.example_simple_key_dictionary; +DROP TABLE 01760_db.example_simple_key_source; SELECT name, database, key.names, key.types, attribute.names, attribute.types, status FROM system.dictionaries WHERE database='01760_db'; @@ -53,7 +53,7 @@ SELECT * FROM 01760_db.example_complex_key_dictionary; SELECT name, database, key.names, key.types, attribute.names, attribute.types, status FROM system.dictionaries WHERE database='01760_db'; -DROP TABLE 01760_db.example_complex_key_source; DROP DICTIONARY 01760_db.example_complex_key_dictionary; +DROP TABLE 01760_db.example_complex_key_source; DROP DATABASE 01760_db; diff --git a/tests/queries/0_stateless/01763_max_distributed_depth.sql b/tests/queries/0_stateless/01763_max_distributed_depth.sql index 08dc533876d..f722a88226d 100644 --- a/tests/queries/0_stateless/01763_max_distributed_depth.sql +++ b/tests/queries/0_stateless/01763_max_distributed_depth.sql @@ -17,19 +17,6 @@ ENGINE = Distributed('test_shard_localhost', '', 'tt7', rand()); DROP TABLE IF EXISTS tt7; -CREATE TABLE tt7 as tt6 ENGINE = Distributed('test_shard_localhost', '', 'tt6', rand()); - -INSERT INTO tt6 VALUES (1, 1, 1, 1, 'ok'); -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } - -SELECT * FROM tt6; -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } - -SET max_distributed_depth = 0; - --- stack overflow -INSERT INTO tt6 VALUES (1, 1, 1, 1, 'ok'); -- { serverError TOO_DEEP_RECURSION} - --- stack overflow -SELECT * FROM tt6; -- { serverError TOO_DEEP_RECURSION } +CREATE TABLE tt7 as tt6 ENGINE = Distributed('test_shard_localhost', '', 'tt6', rand()); -- {serverError INFINITE_LOOP} DROP TABLE tt6; -DROP TABLE tt7; diff --git a/tests/queries/0_stateless/01764_table_function_dictionary.sql b/tests/queries/0_stateless/01764_table_function_dictionary.sql index b642fdd741e..76e7213b367 100644 --- a/tests/queries/0_stateless/01764_table_function_dictionary.sql +++ b/tests/queries/0_stateless/01764_table_function_dictionary.sql @@ -23,5 +23,5 @@ LAYOUT(DIRECT()); SELECT * FROM dictionary('table_function_dictionary_test_dictionary'); -DROP TABLE table_function_dictionary_source_table; DROP DICTIONARY table_function_dictionary_test_dictionary; +DROP TABLE table_function_dictionary_source_table; diff --git a/tests/queries/0_stateless/01804_dictionary_decimal256_type.sql b/tests/queries/0_stateless/01804_dictionary_decimal256_type.sql index 77e9abfb742..08a8d0feb27 100644 --- a/tests/queries/0_stateless/01804_dictionary_decimal256_type.sql +++ b/tests/queries/0_stateless/01804_dictionary_decimal256_type.sql @@ -25,6 +25,8 @@ LAYOUT(FLAT()); SELECT 'Flat dictionary'; SELECT dictGet('flat_dictionary', 'decimal_value', toUInt64(1)); +DROP DICTIONARY flat_dictionary; + DROP DICTIONARY IF EXISTS hashed_dictionary; CREATE DICTIONARY hashed_dictionary ( diff --git a/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql b/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql index d5108e98510..da364403893 100644 --- a/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql +++ b/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql @@ -310,6 +310,6 @@ SELECT name, found_rate FROM system.dictionaries WHERE database = currentDatabas SELECT tuple(x, y) as key, dictGet('polygon_dictionary_01862', 'name', key) FROM points_01862 FORMAT Null; SELECT name, found_rate FROM system.dictionaries WHERE database = currentDatabase() AND name = 'polygon_dictionary_01862'; +DROP DICTIONARY polygon_dictionary_01862; DROP TABLE polygons_01862; DROP TABLE points_01862; -DROP DICTIONARY polygon_dictionary_01862; diff --git a/tests/queries/0_stateless/01904_dictionary_default_nullable_type.sql b/tests/queries/0_stateless/01904_dictionary_default_nullable_type.sql index 4c623941a19..d28f9e5c4e6 100644 --- a/tests/queries/0_stateless/01904_dictionary_default_nullable_type.sql +++ b/tests/queries/0_stateless/01904_dictionary_default_nullable_type.sql @@ -111,6 +111,8 @@ LAYOUT(IP_TRIE()); SELECT 'IPTrie dictionary'; SELECT dictGet('ip_trie_dictionary', 'value', tuple(IPv4StringToNum('127.0.0.0'))); --{serverError UNSUPPORTED_METHOD} +DROP DICTIONARY ip_trie_dictionary; + DROP TABLE dictionary_nullable_source_table; DROP TABLE dictionary_nullable_default_source_table; diff --git a/tests/queries/0_stateless/01910_view_dictionary.sql b/tests/queries/0_stateless/01910_view_dictionary.sql index 1f9928735b4..05a67889825 100644 --- a/tests/queries/0_stateless/01910_view_dictionary.sql +++ b/tests/queries/0_stateless/01910_view_dictionary.sql @@ -45,5 +45,5 @@ FROM numbers(3); DROP TABLE dictionary_source_en; DROP TABLE dictionary_source_ru; -DROP TABLE dictionary_source_view; DROP DICTIONARY flat_dictionary; +DROP TABLE dictionary_source_view; diff --git a/tests/queries/0_stateless/01942_dateTimeToSnowflake.sql b/tests/queries/0_stateless/01942_dateTimeToSnowflake.sql index 1090179bb67..6cce4863c15 100644 --- a/tests/queries/0_stateless/01942_dateTimeToSnowflake.sql +++ b/tests/queries/0_stateless/01942_dateTimeToSnowflake.sql @@ -1,3 +1,4 @@ +SET allow_deprecated_snowflake_conversion_functions = 1; -- Force-enable deprecated snowflake conversion functions (in case this is randomized in CI) SET session_timezone = 'Africa/Juba'; -- Error cases @@ -10,6 +11,9 @@ SELECT dateTime64ToSnowflake('abc'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} SELECT dateTimeToSnowflake('abc', 123); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} SELECT dateTime64ToSnowflake('abc', 123); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT dateTimeToSnowflake(now()) SETTINGS allow_deprecated_snowflake_conversion_functions = 0; -- { serverError DEPRECATED_FUNCTION } +SELECT dateTime64ToSnowflake(now64()) SETTINGS allow_deprecated_snowflake_conversion_functions = 0; -- { serverError DEPRECATED_FUNCTION } + SELECT '-- const / non-const inputs'; WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt diff --git a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference new file mode 100644 index 00000000000..5dcd0c9dfcd --- /dev/null +++ b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference @@ -0,0 +1,32 @@ +-- Negative tests +-- Return type +UInt64 +UInt64 +-- Standard and twitter epoch +Row 1: +────── +dt: 2021-08-15 18:57:56 +dt64: 2021-08-15 18:57:56.492 +dateTimeToSnowflakeID(dt): 6832747188322304000 +dateTime64ToSnowflakeID(dt64): 6832747190385901568 +dateTimeToSnowflakeID(dt, twitter_epoch): 1426981498778550272 +dateTime64ToSnowflakeID(dt64, twitter_epoch): 1426981500842147840 +-- Different DateTime64 scales +Row 1: +────── +dateTime64ToSnowflakeID(dt64_0): 6832747188322304000 +dateTime64ToSnowflakeID(dt64_1): 6832747190000025600 +dateTime64ToSnowflakeID(dt64_2): 6832747190377512960 +dateTime64ToSnowflakeID(dt64_3): 6832747190385901568 +dateTime64ToSnowflakeID(dt64_4): 6832747190385901568 +-- Idempotency +Row 1: +────── +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_0), 0, 'UTC'), dt64_0): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_1), 0, 'UTC'), dt64_1): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_2), 0, 'UTC'), dt64_2): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_3), 0, 'UTC'), dt64_3): 1 +Row 1: +────── +dt64_4: 2023-11-11 11:11:11.1231 +snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4)): 2023-11-11 11:11:11.123 diff --git a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql new file mode 100644 index 00000000000..945b399157f --- /dev/null +++ b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql @@ -0,0 +1,74 @@ +SET session_timezone = 'UTC'; -- disable timezone randomization +SET allow_experimental_analyzer = 1; -- The old path formats the result with different whitespaces + +SELECT '-- Negative tests'; +SELECT dateTimeToSnowflakeID(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT dateTime64ToSnowflakeID(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT dateTimeToSnowflakeID('invalid_dt'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT dateTime64ToSnowflakeID('invalid_dt'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT dateTimeToSnowflakeID(now(), 'invalid_epoch'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT dateTime64ToSnowflakeID(now64(), 'invalid_epoch'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT dateTimeToSnowflakeID(now(), 42, 'too_many_args'); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT dateTime64ToSnowflakeID(now64(), 42, 'too_many_args'); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} + +SELECT '-- Return type'; +SELECT toTypeName(dateTimeToSnowflakeID(now())); +SELECT toTypeName(dateTime64ToSnowflakeID(now64())); + +SELECT '-- Standard and twitter epoch'; + +WITH + toDateTime('2021-08-15 18:57:56') AS dt, + toDateTime64('2021-08-15 18:57:56.492', 3) AS dt64, + 1288834974657 AS twitter_epoch +SELECT + dt, + dt64, + dateTimeToSnowflakeID(dt), + dateTime64ToSnowflakeID(dt64), + dateTimeToSnowflakeID(dt, twitter_epoch), + dateTime64ToSnowflakeID(dt64, twitter_epoch) +FORMAT + Vertical; + +SELECT '-- Different DateTime64 scales'; + +WITH + toDateTime64('2021-08-15 18:57:56.492', 0, 'UTC') AS dt64_0, + toDateTime64('2021-08-15 18:57:56.492', 1, 'UTC') AS dt64_1, + toDateTime64('2021-08-15 18:57:56.492', 2, 'UTC') AS dt64_2, + toDateTime64('2021-08-15 18:57:56.492', 3, 'UTC') AS dt64_3, + toDateTime64('2021-08-15 18:57:56.492', 4, 'UTC') AS dt64_4 +SELECT + dateTime64ToSnowflakeID(dt64_0), + dateTime64ToSnowflakeID(dt64_1), + dateTime64ToSnowflakeID(dt64_2), + dateTime64ToSnowflakeID(dt64_3), + dateTime64ToSnowflakeID(dt64_4) +Format + Vertical; + +SELECT '-- Idempotency'; + + -- DateTime64-to-SnowflakeID-to-DateTime64 is idempotent if the scale is <=3 (millisecond precision) +WITH + now64(0) AS dt64_0, + now64(1) AS dt64_1, + now64(2) AS dt64_2, + now64(3) AS dt64_3 +SELECT + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_0), 0, 'UTC') == dt64_0, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_1), 0, 'UTC') == dt64_1, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_2), 0, 'UTC') == dt64_2, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_3), 0, 'UTC') == dt64_3 +FORMAT + Vertical; + +-- not idempotent +WITH + toDateTime64('2023-11-11 11:11:11.1231', 4, 'UTC') AS dt64_4 +SELECT + dt64_4, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4)) +FORMAT + Vertical; diff --git a/tests/queries/0_stateless/01942_snowflakeIDToDateTime.reference b/tests/queries/0_stateless/01942_snowflakeIDToDateTime.reference new file mode 100644 index 00000000000..9ed8c1dd3e5 --- /dev/null +++ b/tests/queries/0_stateless/01942_snowflakeIDToDateTime.reference @@ -0,0 +1,27 @@ +-- Negative tests +-- Return type +DateTime +DateTime64(3) +-- Non-const path +Row 1: +────── +sf: 7204436857747984384 +dt: 2024-06-06 10:59:58 +dt64: 2024-06-06 10:59:58.851 +Row 1: +────── +sf: 1426981498778550272 +dt: 2021-08-15 18:57:56 +dt64: 2021-08-15 18:57:56.000 +Row 1: +────── +sf: 7204436857747984384 +dt: 2024-06-06 18:59:58 +dt64: 2024-06-06 18:59:58.851 +-- Const path +Row 1: +────── +sf: 7204436857747984384 +dt: 2024-06-06 10:59:58 +dt64: 2024-06-06 10:59:58.851 +-- Can be combined with generateSnowflakeID diff --git a/tests/queries/0_stateless/01942_snowflakeIDToDateTime.sql b/tests/queries/0_stateless/01942_snowflakeIDToDateTime.sql new file mode 100644 index 00000000000..48316691c71 --- /dev/null +++ b/tests/queries/0_stateless/01942_snowflakeIDToDateTime.sql @@ -0,0 +1,82 @@ +SET session_timezone = 'UTC'; -- disable timezone randomization +SET allow_experimental_analyzer = 1; -- The old path formats the result with different whitespaces + +SELECT '-- Negative tests'; +SELECT snowflakeIDToDateTime(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT snowflakeIDToDateTime64(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT snowflakeIDToDateTime('invalid_snowflake'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime64('invalid_snowflake'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime(123::UInt64, 'invalid_epoch'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime64(123::UInt64, 'invalid_epoch'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime(123::UInt64, materialize(42)); -- {serverError ILLEGAL_COLUMN} +SELECT snowflakeIDToDateTime64(123::UInt64, materialize(42)); -- {serverError ILLEGAL_COLUMN} +SELECT snowflakeIDToDateTime(123::UInt64, 42, 42); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime64(123::UInt64, 42, 42); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime(123::UInt64, 42, 'UTC', 'too_many_args'); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT snowflakeIDToDateTime64(123::UInt64, 42, 'UTC', 'too_many_args'); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} + +SELECT '-- Return type'; +SELECT toTypeName(snowflakeIDToDateTime(123::UInt64)); +SELECT toTypeName(snowflakeIDToDateTime64(123::UInt64)); + +SELECT '-- Non-const path'; +-- Two const arguments are mapped to two non-const arguments ('getDefaultImplementationForConstants'), the non-const path is taken + +WITH + 7204436857747984384 AS sf +SELECT + sf, + snowflakeIDToDateTime(sf) as dt, + snowflakeIDToDateTime64(sf) as dt64 +FORMAT + Vertical; + +-- With Twitter Snowflake ID and Twitter epoch +WITH + 1426981498778550272 AS sf, + 1288834974657 AS epoch +SELECT + sf, + snowflakeIDToDateTime(sf, epoch) as dt, + snowflakeIDToDateTime64(sf, epoch) as dt64 +FORMAT + Vertical; + +-- non-default timezone +WITH + 7204436857747984384 AS sf, + 0 AS epoch, -- default epoch + 'Asia/Shanghai' AS tz +SELECT + sf, + snowflakeIDToDateTime(sf, epoch, tz) as dt, + snowflakeIDToDateTime64(sf, epoch, tz) as dt64 +FORMAT + Vertical; + +SELECT '-- Const path'; + +-- The const path can only be tested by const snowflake + const epoch + non-const time-zone. The latter requires a special setting. +WITH + 7204436857747984384 AS sf, + 0 AS epoch, -- default epoch + materialize('Asia/Shanghai') AS tz +SELECT + sf, + snowflakeIDToDateTime(sf, epoch, tz) as dt, + snowflakeIDToDateTime64(sf, epoch, tz) as dt64 +FORMAT + Vertical +SETTINGS + allow_nonconst_timezone_arguments = 1; + + +SELECT '-- Can be combined with generateSnowflakeID'; + +WITH + generateSnowflakeID() AS snowflake +SELECT + snowflakeIDToDateTime(snowflake), + snowflakeIDToDateTime64(snowflake) +FORMAT + Null; diff --git a/tests/queries/0_stateless/01942_snowflakeToDateTime.sql b/tests/queries/0_stateless/01942_snowflakeToDateTime.sql index f1a50dd370d..34fe15ec187 100644 --- a/tests/queries/0_stateless/01942_snowflakeToDateTime.sql +++ b/tests/queries/0_stateless/01942_snowflakeToDateTime.sql @@ -1,4 +1,6 @@ --- -- Error cases +SET allow_deprecated_snowflake_conversion_functions = 1; -- Force-enable deprecated snowflake conversion functions (in case this is randomized in CI) + +-- Error cases SELECT snowflakeToDateTime(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} SELECT snowflakeToDateTime64(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} @@ -8,32 +10,35 @@ SELECT snowflakeToDateTime64('abc'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} SELECT snowflakeToDateTime('abc', 123); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} SELECT snowflakeToDateTime64('abc', 123); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeToDateTime(123::Int64) SETTINGS allow_deprecated_snowflake_conversion_functions = 0; -- { serverError DEPRECATED_FUNCTION } +SELECT snowflakeToDateTime64(123::Int64) SETTINGS allow_deprecated_snowflake_conversion_functions = 0; -- { serverError DEPRECATED_FUNCTION } + SELECT 'const column'; WITH - CAST(1426860704886947840 AS Int64) AS i64, - 'UTC' AS tz + CAST(1426860704886947840 AS Int64) AS i64, + 'UTC' AS tz SELECT - tz, - i64, - snowflakeToDateTime(i64, tz) as dt, - toTypeName(dt), - snowflakeToDateTime64(i64, tz) as dt64, - toTypeName(dt64); + tz, + i64, + snowflakeToDateTime(i64, tz) as dt, + toTypeName(dt), + snowflakeToDateTime64(i64, tz) as dt64, + toTypeName(dt64); WITH - CAST(1426860704886947840 AS Int64) AS i64, - 'Asia/Shanghai' AS tz + CAST(1426860704886947840 AS Int64) AS i64, + 'Asia/Shanghai' AS tz SELECT - tz, - i64, - snowflakeToDateTime(i64, tz) as dt, - toTypeName(dt), - snowflakeToDateTime64(i64, tz) as dt64, - toTypeName(dt64); + tz, + i64, + snowflakeToDateTime(i64, tz) as dt, + toTypeName(dt), + snowflakeToDateTime64(i64, tz) as dt64, + toTypeName(dt64); DROP TABLE IF EXISTS tab; -CREATE TABLE tab(val Int64, tz String) engine=Log; +CREATE TABLE tab(val Int64, tz String) engine = Log; INSERT INTO tab VALUES (42, 'Asia/Singapore'); SELECT 1 FROM tab WHERE snowflakeToDateTime(42::Int64, tz) != now() SETTINGS allow_nonconst_timezone_arguments = 1; diff --git a/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql b/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql index 72cac481376..ea2dad5c732 100644 --- a/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql +++ b/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql @@ -53,8 +53,8 @@ SELECT CountryID, StartDate, Tax FROM range_dictionary ORDER BY CountryID, Start SELECT 'onlySpecificColumn'; SELECT Tax FROM range_dictionary ORDER BY CountryID, StartDate, EndDate; -DROP TABLE date_table; DROP DICTIONARY range_dictionary; +DROP TABLE date_table; CREATE TABLE date_table ( @@ -107,5 +107,5 @@ SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable ORDER BY Country SELECT 'onlySpecificColumn'; SELECT Tax FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; -DROP TABLE date_table; DROP DICTIONARY range_dictionary_nullable; +DROP TABLE date_table; diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index d15c1545135..8f62eda9233 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -1136,7 +1136,7 @@ CREATE TABLE system.users `name` String, `id` UUID, `storage` String, - `auth_type` Enum8('no_password' = 0, 'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6, 'bcrypt_password' = 7, 'ssh_key' = 8, 'http' = 9), + `auth_type` Enum8('no_password' = 0, 'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6, 'bcrypt_password' = 7, 'ssh_key' = 8, 'http' = 9, 'jwt' = 10), `auth_params` String, `host_ip` Array(String), `host_names` Array(String), diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference deleted file mode 100644 index 1fc09c8d154..00000000000 --- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference +++ /dev/null @@ -1,16 +0,0 @@ -Checking input_format_parallel_parsing=false& -1 -Checking input_format_parallel_parsing=false&cancel_http_readonly_queries_on_client_close=1&readonly=1 -1 -Checking input_format_parallel_parsing=false&send_progress_in_http_headers=true -1 -Checking input_format_parallel_parsing=false&cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true -1 -Checking input_format_parallel_parsing=true& -1 -Checking input_format_parallel_parsing=true&cancel_http_readonly_queries_on_client_close=1&readonly=1 -1 -Checking input_format_parallel_parsing=true&send_progress_in_http_headers=true -1 -Checking input_format_parallel_parsing=true&cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true -1 diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh deleted file mode 100755 index 5494f7d59cb..00000000000 --- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-tsan, no-cpu-aarch64, no-parallel, no-debug -# TSan does not supports tracing. -# trace_log doesn't work on aarch64 - -# Regression for proper release of Context, -# via tracking memory of external tables. - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -tmp_file=$(mktemp "$CURDIR/clickhouse.XXXXXX.csv") -trap 'rm $tmp_file' EXIT - -$CLICKHOUSE_CLIENT -q "SELECT toString(number) FROM numbers(1e6) FORMAT TSV" > "$tmp_file" - -function run_and_check() -{ - local query_id - query_id="$(${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SELECT generateUUIDv4()')" - - echo "Checking $*" - - # Run query with external table (implicit StorageMemory user) - $CLICKHOUSE_CURL -sS -F "s=@$tmp_file;" "$CLICKHOUSE_URL&s_structure=key+Int&query=SELECT+count()+FROM+s&memory_profiler_sample_probability=1&max_untracked_memory=0&query_id=$query_id&$*" -o /dev/null - - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SYSTEM FLUSH LOGS' - - # Check that temporary table had been destroyed. - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&allow_introspection_functions=1" --data-binary @- <<<" - WITH arrayStringConcat(arrayMap(x -> demangle(addressToSymbol(x)), trace), '\n') AS sym - SELECT 1 FROM system.trace_log - PREWHERE - query_id = '$query_id' AND - trace_type = 'MemorySample' AND - /* only deallocations */ - size < 0 AND - event_date >= yesterday() - WHERE - sym LIKE '%DB::StorageMemory::drop%\n%TemporaryTableHolder::~TemporaryTableHolder%' - LIMIT 1 - " -} - -for input_format_parallel_parsing in false true; do - query_args_variants=( - "" - "cancel_http_readonly_queries_on_client_close=1&readonly=1" - "send_progress_in_http_headers=true" - # nested progress callback - "cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true" - ) - for query_args in "${query_args_variants[@]}"; do - run_and_check "input_format_parallel_parsing=$input_format_parallel_parsing&$query_args" - done -done diff --git a/tests/queries/0_stateless/02154_parser_backtracking.sh b/tests/queries/0_stateless/02154_parser_backtracking.sh index fd227bcfc56..72121d14dfa 100755 --- a/tests/queries/0_stateless/02154_parser_backtracking.sh +++ b/tests/queries/0_stateless/02154_parser_backtracking.sh @@ -1,4 +1,6 @@ #!/usr/bin/env bash +# Tags: no-tsan +# ^ TSan uses more stack CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02155_dictionary_comment.sql b/tests/queries/0_stateless/02155_dictionary_comment.sql index 30b85e16a7c..8ebc7b259fc 100644 --- a/tests/queries/0_stateless/02155_dictionary_comment.sql +++ b/tests/queries/0_stateless/02155_dictionary_comment.sql @@ -49,5 +49,5 @@ SELECT name, comment FROM system.tables WHERE name == '02155_test_dictionary_vie SELECT name, comment FROM system.tables WHERE name == '02155_test_dictionary_view' AND database == currentDatabase(); DROP TABLE 02155_test_dictionary_view; -DROP TABLE 02155_test_table; DROP DICTIONARY 02155_test_dictionary; +DROP TABLE 02155_test_table; diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference index 8a18c609ede..876cee60baa 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference @@ -4,6 +4,12 @@ Prewhere info Prewhere filter Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) 2 Filter column: and(equals(k, 3), notEmpty(v)) (removed) Prewhere info diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql index ca61a8f2d57..4f010ebadfd 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql @@ -24,7 +24,8 @@ INSERT INTO t_02156_mt1 SELECT number, toString(number) FROM numbers(10000); INSERT INTO t_02156_mt2 SELECT number, toString(number) FROM numbers(10000); INSERT INTO t_02156_log SELECT number, toString(number) FROM numbers(10000); -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%' settings allow_experimental_analyzer=1; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%' settings allow_experimental_analyzer=0; SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v); SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge2 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere_2.reference b/tests/queries/0_stateless/02156_storage_merge_prewhere_2.reference new file mode 100644 index 00000000000..8e759648871 --- /dev/null +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere_2.reference @@ -0,0 +1,12 @@ +1 a +1 a +2 b +2 b +1 a +1 a +2 b +2 b +1 a +2 b +1 a +2 b diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere_2.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere_2.sql new file mode 100644 index 00000000000..1b4881d4e7b --- /dev/null +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere_2.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS t_02156_ololo_1; +DROP TABLE IF EXISTS t_02156_ololo_2; +DROP TABLE IF EXISTS t_02156_ololo_dist; + +CREATE TABLE t_02156_ololo_1 (k UInt32, v Nullable(String)) ENGINE = MergeTree order by k; +CREATE TABLE t_02156_ololo_2 (k UInt32, v String) ENGINE = MergeTree order by k; +CREATE TABLE t_02156_ololo_dist (k UInt32, v String) ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_02156_ololo_2); +CREATE TABLE t_02156_ololo_dist2 (k UInt32, v Nullable(String)) ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_02156_ololo_1); + +insert into t_02156_ololo_1 values (1, 'a'); +insert into t_02156_ololo_2 values (2, 'b'); + +select * from merge('t_02156_ololo') where k != 0 and notEmpty(v) order by k settings optimize_move_to_prewhere=0; +select * from merge('t_02156_ololo') where k != 0 and notEmpty(v) order by k settings optimize_move_to_prewhere=1; + +select * from merge('t_02156_ololo_dist') where k != 0 and notEmpty(v) order by k settings optimize_move_to_prewhere=0; +select * from merge('t_02156_ololo_dist') where k != 0 and notEmpty(v) order by k settings optimize_move_to_prewhere=1; diff --git a/tests/queries/0_stateless/02183_dictionary_date_types.sql b/tests/queries/0_stateless/02183_dictionary_date_types.sql index e06863d5e53..5671f47cdab 100644 --- a/tests/queries/0_stateless/02183_dictionary_date_types.sql +++ b/tests/queries/0_stateless/02183_dictionary_date_types.sql @@ -170,8 +170,8 @@ LIFETIME(0); SELECT 'Polygon dictionary'; SELECT * FROM 02183_polygon_dictionary; -DROP TABLE 02183_polygon_dictionary_source_table; DROP DICTIONARY 02183_polygon_dictionary; +DROP TABLE 02183_polygon_dictionary_source_table; DROP TABLE IF EXISTS 02183_range_dictionary_source_table; CREATE TABLE 02183_range_dictionary_source_table diff --git a/tests/queries/0_stateless/02185_range_hashed_dictionary_open_ranges.sql b/tests/queries/0_stateless/02185_range_hashed_dictionary_open_ranges.sql index e6edee2ea18..a36c72de0ac 100644 --- a/tests/queries/0_stateless/02185_range_hashed_dictionary_open_ranges.sql +++ b/tests/queries/0_stateless/02185_range_hashed_dictionary_open_ranges.sql @@ -60,4 +60,5 @@ SELECT dictHas('02185_range_dictionary', 0, 0); SELECT dictHas('02185_range_dictionary', 0, 5001); SELECT dictHas('02185_range_dictionary', 0, 10001); +DROP DICTIONARY 02185_range_dictionary; DROP TABLE 02185_range_dictionary_source_table; diff --git a/tests/queries/0_stateless/02210_processors_profile_log.reference b/tests/queries/0_stateless/02210_processors_profile_log.reference index 41543d0706a..035bd9897ad 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.reference +++ b/tests/queries/0_stateless/02210_processors_profile_log.reference @@ -1,38 +1,8 @@ --- { echo } -EXPLAIN PIPELINE SELECT sleep(1); (Expression) ExpressionTransform (ReadFromStorage) SourceFromSingleChunk 0 → 1 -SELECT sleep(1) SETTINGS log_processors_profiles=true, log_queries=1, log_queries_min_type='QUERY_FINISH'; 0 -SYSTEM FLUSH LOGS; -WITH - ( - SELECT query_id - FROM system.query_log - WHERE current_database = currentDatabase() AND Settings['log_processors_profiles']='1' - ) AS query_id_ -SELECT - name, - multiIf( - -- ExpressionTransform executes sleep(), - -- so IProcessor::work() will spend 1 sec. - name = 'ExpressionTransform', elapsed_us>=1e6, - -- SourceFromSingleChunk, that feed data to ExpressionTransform, - -- will feed first block and then wait in PortFull. - name = 'SourceFromSingleChunk', output_wait_elapsed_us>=1e6, - -- NullSource/LazyOutputFormatLazyOutputFormat are the outputs - -- so they cannot starts to execute before sleep(1) will be executed. - input_wait_elapsed_us>=1e6) - elapsed, - input_rows, - input_bytes, - output_rows, - output_bytes -FROM system.processors_profile_log -WHERE query_id = query_id_ -ORDER BY name; ExpressionTransform 1 1 1 1 1 LazyOutputFormat 1 1 1 0 0 LimitsCheckingTransform 1 1 1 1 1 diff --git a/tests/queries/0_stateless/02210_processors_profile_log.sql b/tests/queries/0_stateless/02210_processors_profile_log.sql index a15ed26fd67..59edbb71457 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.sql +++ b/tests/queries/0_stateless/02210_processors_profile_log.sql @@ -1,4 +1,3 @@ --- { echo } EXPLAIN PIPELINE SELECT sleep(1); SELECT sleep(1) SETTINGS log_processors_profiles=true, log_queries=1, log_queries_min_type='QUERY_FINISH'; @@ -15,13 +14,13 @@ SELECT multiIf( -- ExpressionTransform executes sleep(), -- so IProcessor::work() will spend 1 sec. - name = 'ExpressionTransform', elapsed_us>=1e6, + name = 'ExpressionTransform', elapsed_us>=1e6 ? 1 : elapsed_us, -- SourceFromSingleChunk, that feed data to ExpressionTransform, -- will feed first block and then wait in PortFull. - name = 'SourceFromSingleChunk', output_wait_elapsed_us>=1e6, + name = 'SourceFromSingleChunk', output_wait_elapsed_us>=1e6 ? 1 : output_wait_elapsed_us, -- NullSource/LazyOutputFormatLazyOutputFormat are the outputs -- so they cannot starts to execute before sleep(1) will be executed. - input_wait_elapsed_us>=1e6) + input_wait_elapsed_us>=1e6 ? 1 : input_wait_elapsed_us) elapsed, input_rows, input_bytes, diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference index 93b6d4de94f..6b5dd182112 100644 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference @@ -34,3 +34,21 @@ DOWNLOADED 0 79 80 DOWNLOADED 0 745 746 2 Expect no cache +Using storage policy: azure_cache +0 +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect no cache +Expect cache +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +2 +Expect no cache diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh index 9aa631c5d0a..57b8cec7864 100755 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference index 186dcc1eeb2..f53f00992e7 100644 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference @@ -218,3 +218,113 @@ SELECT count() FROM test_02241 5010500 SELECT count() FROM test_02241 WHERE value LIKE '%010%' 18816 +Using storage policy: azure_cache +DROP TABLE IF EXISTS test_02241 +CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='azure_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization = 1 +SYSTEM STOP MERGES test_02241 +SYSTEM DROP FILESYSTEM CACHE +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path +0 +SELECT count(), sum(size) FROM system.filesystem_cache +0 0 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical +Row 1: +────── +file_segment_range_begin: 0 +file_segment_range_end: 745 +size: 746 +state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path +8 +SELECT count(), sum(size) FROM system.filesystem_cache +8 1100 +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 +0 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 +2 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 +2 +SELECT count(), sum(size) size FROM system.filesystem_cache +8 1100 +SYSTEM DROP FILESYSTEM CACHE +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100, 200) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical; +Row 1: +────── +file_segment_range_begin: 0 +file_segment_range_end: 1659 +size: 1660 +state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path +8 +SELECT count(), sum(size) FROM system.filesystem_cache +8 2014 +SELECT count(), sum(size) FROM system.filesystem_cache +8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0 +SELECT count(), sum(size) FROM system.filesystem_cache +8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000) +SELECT count(), sum(size) FROM system.filesystem_cache +24 84045 +SYSTEM START MERGES test_02241 +OPTIMIZE TABLE test_02241 FINAL +SELECT count(), sum(size) FROM system.filesystem_cache +32 167243 +ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100 +SELECT count(), sum(size) FROM system.filesystem_cache +41 250541 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) +SYSTEM FLUSH LOGS +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) 0 +SELECT count() FROM test_02241 +5010500 +SELECT count() FROM test_02241 WHERE value LIKE '%010%' +18816 diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index c1d930f54a7..1028fba76f5 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" $CLICKHOUSE_CLIENT --echo --query "DROP TABLE IF EXISTS test_02241" diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference index 99f31df7def..447e1a275fc 100644 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference @@ -6,3 +6,7 @@ Using storage policy: local_cache (0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE (0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE (0,808110) READ_FROM_CACHE +Using storage policy: azure_cache +(0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE +(0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE +(0,808110) READ_FROM_CACHE diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh index 4c92d1d2954..7a665d81eab 100755 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" diff --git a/tests/queries/0_stateless/02265_column_ttl.sql b/tests/queries/0_stateless/02265_column_ttl.sql index 16ae2da2a2b..ac64dd9457a 100644 --- a/tests/queries/0_stateless/02265_column_ttl.sql +++ b/tests/queries/0_stateless/02265_column_ttl.sql @@ -16,7 +16,8 @@ insert into ttl_02265 values ('2010-01-01', 2010, 'foo'); optimize table ttl_02265 final; -- after, 20100101_0_0_2 will not have ttl.txt, but will have value.bin optimize table ttl_02265 final; -system sync replica ttl_02265; +system sync replica ttl_02265 STRICT; +system sync replica ttl_02265_r2 STRICT; -- after detach/attach it will not have TTL in-memory, and will not have ttl.txt detach table ttl_02265; diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference index b4e5b6715de..e3875dbabe1 100644 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference @@ -16,3 +16,12 @@ Using storage policy: local_cache 1 1 0 +Using storage policy: azure_cache +0 +2 +0 +1 +1 +1 +1 +0 diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index 1e1841862e9..a2c9352b7aa 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_02286" diff --git a/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.sql b/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.sql index 3825df1e557..a2c2fc7cba2 100644 --- a/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.sql +++ b/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.sql @@ -56,6 +56,6 @@ ALTER TABLE null_before ALTER COLUMN id TYPE INT NULL; -- { clientError SYNTAX_E select 'modify column, NULL modifier is not allowed'; DROP TABLE IF EXISTS null_before SYNC; CREATE TABLE null_before (id INT NOT NULL) ENGINE=MergeTree() ORDER BY tuple(); -ALTER TABLE null_before MODIFY COLUMN id NULL DEFAULT 1; -- { serverError UNKNOWN_TYPE } +ALTER TABLE null_before MODIFY COLUMN id NULL DEFAULT 1; -- { clientError SYNTAX_ERROR } DROP TABLE IF EXISTS null_before SYNC; diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference b/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference index 062aac259a4..0a9e1c20b59 100644 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference @@ -1,3 +1,4 @@ Using storage policy: s3_cache Using storage policy: local_cache Using storage policy: s3_cache_multi +Using storage policy: azure_cache diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh index f5de4346fd6..fbaec1ffaa7 100755 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache' 's3_cache_multi'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 's3_cache_multi' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index b100f96befa..bd018018789 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -16,7 +16,7 @@ function check_refcnt_for_table() -- queue may hold the parts lock for awhile as well system stop pulling replication log $table; " - $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into $table select number, number%4 from numbers(200)" + $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into $table select number, number%4 from numbers(2000)" local query_id query_id="$table-$(random_str 10)" @@ -35,7 +35,7 @@ function check_refcnt_for_table() ) # Notes: - # - query may sleep 0.1*(200/4)=5 seconds maximum, it is enough to check system.parts + # - query may sleep 0.1*(2000/4)=50 seconds maximum, it is enough to check system.parts # - "part = 1" condition should prune all parts except first # - max_block_size=1 with index_granularity=1 will allow to cancel the query earlier $CLICKHOUSE_CLIENT "${args[@]}" -q "select sleepEachRow(0.1) from $table where part = 1" & diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index db8182e30bb..6895606eb2b 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1,2 +1,2 @@ 1 -102400 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/02344_describe_cache_test 5 5000 0 16 +102400 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/02344_describe_cache_test 0 5000 0 16 diff --git a/tests/queries/0_stateless/02366_kql_create_table.sql b/tests/queries/0_stateless/02366_kql_create_table.sql index b266679b06a..75a81c5dbd3 100644 --- a/tests/queries/0_stateless/02366_kql_create_table.sql +++ b/tests/queries/0_stateless/02366_kql_create_table.sql @@ -1,8 +1,8 @@ DROP TABLE IF EXISTS Customers; CREATE TABLE Customers -( +( FirstName Nullable(String), - LastName String, + LastName String, Occupation String, Education String, Age Nullable(UInt8) @@ -10,20 +10,20 @@ CREATE TABLE Customers INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); Select '-- test create table --' ; -Select * from kql(Customers|project FirstName) limit 1;; +Select * from kql($$Customers|project FirstName$$) limit 1;; DROP TABLE IF EXISTS kql_table1; -CREATE TABLE kql_table1 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName | filter LastName=='Diaz'); +CREATE TABLE kql_table1 ENGINE = Memory AS select *, now() as new_column From kql($$Customers | project LastName | filter LastName=='Diaz'$$); select LastName from kql_table1 limit 1; DROP TABLE IF EXISTS kql_table2; CREATE TABLE kql_table2 -( +( FirstName Nullable(String), - LastName String, + LastName String, Age Nullable(UInt8) ) ENGINE = Memory; -INSERT INTO kql_table2 select * from kql(Customers|project FirstName,LastName,Age | filter FirstName=='Theodore'); +INSERT INTO kql_table2 select * from kql($$Customers|project FirstName,LastName,Age | filter FirstName=='Theodore'$$); select * from kql_table2 limit 1; --- select * from kql(Customers | where FirstName !in ("test", "test2")); +-- select * from kql($$Customers | where FirstName !in ("test", "test2")$$); DROP TABLE IF EXISTS Customers; DROP TABLE IF EXISTS kql_table1; -DROP TABLE IF EXISTS kql_table2; \ No newline at end of file +DROP TABLE IF EXISTS kql_table2; diff --git a/tests/queries/0_stateless/02391_recursive_buffer.sql b/tests/queries/0_stateless/02391_recursive_buffer.sql index 1a630722b5a..60a2f0d1af1 100644 --- a/tests/queries/0_stateless/02391_recursive_buffer.sql +++ b/tests/queries/0_stateless/02391_recursive_buffer.sql @@ -10,9 +10,5 @@ DROP TABLE test; DROP TABLE IF EXISTS test1; DROP TABLE IF EXISTS test2; CREATE TABLE test1 (key UInt32) Engine = Buffer(currentDatabase(), test2, 16, 10, 100, 10000, 1000000, 10000000, 100000000); -CREATE TABLE test2 (key UInt32) Engine = Buffer(currentDatabase(), test1, 16, 10, 100, 10000, 1000000, 10000000, 100000000); -SELECT * FROM test1; -- { serverError TOO_DEEP_RECURSION } -SELECT * FROM test2; -- { serverError TOO_DEEP_RECURSION } -SELECT * FROM system.tables WHERE table IN ('test1', 'test2') AND database = currentDatabase(); -- { serverError TOO_DEEP_RECURSION } +CREATE TABLE test2 (key UInt32) Engine = Buffer(currentDatabase(), test1, 16, 10, 100, 10000, 1000000, 10000000, 100000000); -- { serverError INFINITE_LOOP } DROP TABLE test1; -DROP TABLE test2; diff --git a/tests/queries/0_stateless/02403_big_http_chunk_size.python b/tests/queries/0_stateless/02403_big_http_chunk_size.python index 3213b8cd387..f74459489a5 100644 --- a/tests/queries/0_stateless/02403_big_http_chunk_size.python +++ b/tests/queries/0_stateless/02403_big_http_chunk_size.python @@ -16,7 +16,7 @@ def main(): sock.settimeout(60) s = "POST / HTTP/1.1\r\n" s += "Host: %s\r\n" % host - s += "Content-type: multipart/form-data\r\n" + s += "Content-type: multipart/form-data; boundary=--b3f1zid8kqwy\r\n" s += "Transfer-encoding: chunked\r\n" s += "\r\n" s += "ffffffffffffffff" diff --git a/tests/queries/0_stateless/02403_big_http_chunk_size.reference b/tests/queries/0_stateless/02403_big_http_chunk_size.reference index d7970bd2eb1..466ff9002e9 100644 --- a/tests/queries/0_stateless/02403_big_http_chunk_size.reference +++ b/tests/queries/0_stateless/02403_big_http_chunk_size.reference @@ -1,3 +1,3 @@ -HTTP/1.1 200 OK +HTTP/1.1 500 Internal Server Error encoding type chunked -error code 1000 +error code 69 diff --git a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference index 01d7fa2a2cb..5eb4670f3cf 100644 --- a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference +++ b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference @@ -18,3 +18,73 @@ QUERY id: 0 LIST id: 9, nodes: 1 CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 1 +QUERY id: 0 + PROJECTION COLUMNS + uniqCombined((materialize((number)))) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: uniqCombined, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + JOIN TREE + TABLE_FUNCTION id: 5, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 6, nodes: 1 + CONSTANT id: 7, constant_value: UInt64_10, constant_value_type: UInt8 +10 +QUERY id: 0 + PROJECTION COLUMNS + uniq(abs(number)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: uniq, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: abs, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt8 +QUERY id: 0 + PROJECTION COLUMNS + uniq(toString(abs(materialize(number)))) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: uniq, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: abs, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: materialize, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 10, nodes: 1 + CONSTANT id: 11, constant_value: UInt64_10, constant_value_type: UInt8 +QUERY id: 0 + PROJECTION COLUMNS + uniq((number, 1)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: uniq, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: tuple, function_type: ordinary, result_type: Tuple(UInt64, UInt8) + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_1, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 9, nodes: 1 + CONSTANT id: 10, constant_value: UInt64_10, constant_value_type: UInt8 diff --git a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.sql b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.sql index 830db274678..5a3b2379fde 100644 --- a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.sql +++ b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.sql @@ -1,5 +1,14 @@ -SET allow_experimental_analyzer = 1; +SET allow_experimental_analyzer = 1, optimize_injective_functions_inside_uniq = 1; +-- Simple test EXPLAIN QUERY TREE SELECT uniqCombined(tuple('')) FROM numbers(1); - SELECT uniqCombined(tuple('')) FROM numbers(1); + +-- Test with chain of injective functions +EXPLAIN QUERY TREE SELECT uniqCombined(tuple(materialize(tuple(number)))) FROM numbers(10); +SELECT uniqCombined(tuple(materialize(toString(number)))) FROM numbers(10); + +-- No or partial optimization cases +EXPLAIN QUERY TREE SELECT uniq(abs(number)) FROM numbers(10); -- no elimination as `abs` is not injective +EXPLAIN QUERY TREE SELECT uniq(toString(abs(materialize(number)))) FROM numbers(10); -- only eliminate `toString` +EXPLAIN QUERY TREE SELECT uniq(tuple(number, 1)) FROM numbers(10); -- no elimination as `tuple` has multiple arguments diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 77ef213b36d..4a4e898c5bd 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -332,13 +332,12 @@ SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it d Expression (Projection) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - Filter ((WHERE + (Projection + Before ORDER BY))) - Filter (HAVING) - Aggregating - Expression ((Before GROUP BY + Projection)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - ReadFromSystemNumbers + Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING)) + Aggregating + Expression ((Before GROUP BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromSystemNumbers -- execute 1 2 diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference index 9bb0c022752..70bcd7f255b 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference @@ -29,20 +29,16 @@ WHERE type_1 = \'all\' ExpressionTransform × 2 (Filter) FilterTransform × 2 - (Filter) - FilterTransform × 2 - (Filter) - FilterTransform × 2 - (Aggregating) - ExpressionTransform × 2 - AggregatingTransform × 2 - Copy 1 → 2 - (Expression) - ExpressionTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Aggregating) + ExpressionTransform × 2 + AggregatingTransform × 2 + Copy 1 → 2 + (Expression) + ExpressionTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Filter) @@ -68,14 +64,10 @@ ExpressionTransform × 2 ExpressionTransform × 2 AggregatingTransform × 2 Copy 1 → 2 - (Filter) - FilterTransform - (Filter) - FilterTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Aggregating) diff --git a/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference b/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference index dd107065380..d391c365ea7 100644 --- a/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference +++ b/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference @@ -1,10 +1,11 @@ Expression ((Project names + (Projection + ))) Header: number UInt64 Actions: INPUT : 0 -> __table2.number UInt64 : 0 - ALIAS __table2.number :: 0 -> number UInt64 : 1 - ALIAS number :: 1 -> __table1.number UInt64 : 0 - ALIAS __table1.number :: 0 -> number UInt64 : 1 -Positions: 1 + INPUT :: 1 -> ignore(2_UInt8) UInt8 : 1 + ALIAS __table2.number :: 0 -> number UInt64 : 2 + ALIAS number :: 2 -> __table1.number UInt64 : 0 + ALIAS __table1.number :: 0 -> number UInt64 : 2 +Positions: 2 Sorting (Sorting for ORDER BY) Header: ignore(2_UInt8) UInt8 __table2.number UInt64 diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql index ff8b9c71e92..741d0177971 100644 --- a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql +++ b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-debug, no-tsan, no-asan, no-ubsan, no-msan, no-parallel +-- Tags: long, no-debug, no-tsan, no-asan, no-ubsan, no-msan, no-parallel, no-sanitize-coverage -- no-parallel because the sets use a lot of memory, which may interfere with other tests diff --git a/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh b/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh index 912cdd3d1e8..c69c635f6ed 100755 --- a/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh +++ b/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh @@ -14,6 +14,7 @@ SETTINGS disk = disk(type = cache, max_size = '1Gi', max_file_segment_size = '40Mi', boundary_alignment = '20Mi', + background_download_threads = 2, path = '$CLICKHOUSE_TEST_UNIQUE_NAME', disk = 's3_disk'); diff --git a/tests/queries/0_stateless/02859_replicated_db_name_zookeeper.reference b/tests/queries/0_stateless/02859_replicated_db_name_zookeeper.reference new file mode 100644 index 00000000000..e7d63a6add3 --- /dev/null +++ b/tests/queries/0_stateless/02859_replicated_db_name_zookeeper.reference @@ -0,0 +1,2 @@ +rdb1_default 1 +rdb3_default 1 diff --git a/tests/queries/0_stateless/02859_replicated_db_name_zookeeper.sh b/tests/queries/0_stateless/02859_replicated_db_name_zookeeper.sh new file mode 100755 index 00000000000..3c14c569257 --- /dev/null +++ b/tests/queries/0_stateless/02859_replicated_db_name_zookeeper.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "CREATE DATABASE rdb1_$CLICKHOUSE_DATABASE ON CLUSTER test_shard_localhost ENGINE=Replicated('/clickhouse/databases/{uuid}', '{shard}', '{replica}')"; +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "CREATE DATABASE rdb2_$CLICKHOUSE_DATABASE ON CLUSTER test_shard_localhost ENGINE=Replicated('/clickhouse/databases/{uuid}', '{shard}', '{replica}')"; +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "RENAME DATABASE rdb2_$CLICKHOUSE_DATABASE to rdb3_$CLICKHOUSE_DATABASE ON CLUSTER test_shard_localhost" + +$CLICKHOUSE_CLIENT -q " +SELECT + db_name, + t1.uuid = t2.uuid +FROM +( + WITH '/clickhouse/databases/' AS prefix + SELECT + toUUID(substr(path, length(prefix) + 1)) AS uuid, + value AS db_name + FROM system.zookeeper + WHERE (path IN ( + SELECT concat(path, name) + FROM system.zookeeper + WHERE path = prefix + )) AND (name = 'first_replica_database_name') +) AS t1 +INNER JOIN system.databases AS t2 USING (uuid) +WHERE db_name like '%$CLICKHOUSE_DATABASE%' +ORDER BY db_name +" + +$CLICKHOUSE_CLIENT -q "DROP DATABASE rdb1_$CLICKHOUSE_DATABASE" +$CLICKHOUSE_CLIENT -q "DROP DATABASE rdb3_$CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02864_statistics_uniq.sql b/tests/queries/0_stateless/02864_statistics_uniq.sql index c6b51d2a377..d496392668b 100644 --- a/tests/queries/0_stateless/02864_statistics_uniq.sql +++ b/tests/queries/0_stateless/02864_statistics_uniq.sql @@ -2,6 +2,7 @@ DROP TABLE IF EXISTS t1; SET allow_experimental_statistics = 1; SET allow_statistics_optimize = 1; +SET mutations_sync = 1; CREATE TABLE t1 ( diff --git a/tests/queries/0_stateless/02867_page_cache.reference b/tests/queries/0_stateless/02867_page_cache.reference deleted file mode 100644 index c3d6484a175..00000000000 --- a/tests/queries/0_stateless/02867_page_cache.reference +++ /dev/null @@ -1,21 +0,0 @@ -cold read 54975576145920 -PageCacheBytesUnpinnedRoundedToHugePages 1 -PageCacheBytesUnpinnedRoundedToPages 1 -PageCacheChunkMisses 1 -ReadBufferFromS3Bytes 1 -repeat read 1 54975576145920 -PageCacheBytesUnpinnedRoundedToHugePages 1 -PageCacheBytesUnpinnedRoundedToPages 1 -PageCacheChunkDataHits 1 -dropped and bypassed cache 54975576145920 -PageCacheChunkMisses 1 -ReadBufferFromS3Bytes 1 -repeat read 2 54975576145920 -PageCacheBytesUnpinnedRoundedToHugePages 1 -PageCacheBytesUnpinnedRoundedToPages 1 -PageCacheChunkMisses 1 -ReadBufferFromS3Bytes 1 -repeat read 3 54975576145920 -PageCacheBytesUnpinnedRoundedToHugePages 1 -PageCacheBytesUnpinnedRoundedToPages 1 -PageCacheChunkDataHits 1 diff --git a/tests/queries/0_stateless/02867_page_cache.sql b/tests/queries/0_stateless/02867_page_cache.sql deleted file mode 100644 index f1882de4af6..00000000000 --- a/tests/queries/0_stateless/02867_page_cache.sql +++ /dev/null @@ -1,106 +0,0 @@ --- Tags: no-fasttest, no-parallel --- no-fasttest because we need an S3 storage policy --- no-parallel because we look at server-wide counters about page cache usage - -set use_page_cache_for_disks_without_file_cache = 1; -set page_cache_inject_eviction = 0; -set enable_filesystem_cache = 0; -set use_uncompressed_cache = 0; - -create table events_snapshot engine Memory as select * from system.events; -create view events_diff as - -- round all stats to 70 MiB to leave a lot of leeway for overhead - with if(event like '%Bytes%', 70*1024*1024, 35) as granularity, - -- cache hits counter can vary a lot depending on other settings: - -- e.g. if merge_tree_min_bytes_for_concurrent_read is small, multiple threads will read each chunk - -- so we just check that the value is not too low - if(event in ( - 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages', - 'PageCacheChunkDataHits'), 1, 1000) as clamp - select event, min2(intDiv(new.value - old.value, granularity), clamp) as diff - from system.events new - left outer join events_snapshot old - on old.event = new.event - where diff != 0 and - event in ( - 'ReadBufferFromS3Bytes', 'PageCacheChunkMisses', 'PageCacheChunkDataMisses', - 'PageCacheChunkDataHits', 'PageCacheChunkDataPartialHits', - 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages') - order by event; - -drop table if exists page_cache_03055; -create table page_cache_03055 (k Int64 CODEC(NONE)) engine MergeTree order by k settings storage_policy = 's3_cache'; - --- Write an 80 MiB file (40 x 2 MiB chunks), and a few small files. -system stop merges page_cache_03055; -insert into page_cache_03055 select * from numbers(10485760) settings max_block_size=100000000, preferred_block_size_bytes=1000000000; - -select * from events_diff; -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - -system start merges page_cache_03055; -optimize table page_cache_03055 final; -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - --- Cold read, should miss cache. (Populating cache on write is not implemented yet.) - -select 'cold read', sum(k) from page_cache_03055; - -select * from events_diff where event not in ('PageCacheChunkDataHits'); -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - --- Repeat read, should hit cache. - -select 'repeat read 1', sum(k) from page_cache_03055; - -select * from events_diff; -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - --- Drop cache and read again, should miss. Also don't write to cache. - -system drop page cache; - -select 'dropped and bypassed cache', sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; - --- Data could be read multiple times because we're not writing to cache. --- (Not checking PageCacheBytesUnpinned* because it's unreliable in this case because of an intentional race condition, see PageCache::evictChunk.) -select event, if(event in ('PageCacheChunkMisses', 'ReadBufferFromS3Bytes'), diff >= 1, diff) from events_diff where event not in ('PageCacheChunkDataHits', 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages'); -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - --- Repeat read, should still miss, but populate cache. - -select 'repeat read 2', sum(k) from page_cache_03055; - -select * from events_diff where event not in ('PageCacheChunkDataHits'); -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - --- Read again, hit the cache. - -select 'repeat read 3', sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; - -select * from events_diff; -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - - --- Known limitation: cache is not invalidated if a table is dropped and created again at the same path. --- set allow_deprecated_database_ordinary=1; --- create database test_03055 engine = Ordinary; --- create table test_03055.t (k Int64) engine MergeTree order by k settings storage_policy = 's3_cache'; --- insert into test_03055.t values (1); --- select * from test_03055.t; --- drop table test_03055.t; --- create table test_03055.t (k Int64) engine MergeTree order by k settings storage_policy = 's3_cache'; --- insert into test_03055.t values (2); --- select * from test_03055.t; - - -drop table events_snapshot; -drop table page_cache_03055; -drop view events_diff; diff --git a/tests/queries/0_stateless/02882_clickhouse_keeper_client_no_confirmation.sh b/tests/queries/0_stateless/02882_clickhouse_keeper_client_no_confirmation.sh index 4bda0cfa5b0..43f86b8a58a 100755 --- a/tests/queries/0_stateless/02882_clickhouse_keeper_client_no_confirmation.sh +++ b/tests/queries/0_stateless/02882_clickhouse_keeper_client_no_confirmation.sh @@ -6,8 +6,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) path="/test-keeper-client-$CLICKHOUSE_DATABASE" -$CLICKHOUSE_KEEPER_CLIENT -q "rm $path" >& /dev/null +$CLICKHOUSE_KEEPER_CLIENT -q "rm '$path'" >& /dev/null -$CLICKHOUSE_KEEPER_CLIENT -q "create $path 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "rmr $path" -$CLICKHOUSE_KEEPER_CLIENT -q "get $path" 2>&1 +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "rmr '$path'" +$CLICKHOUSE_KEEPER_CLIENT -q "get '$path'" 2>&1 diff --git a/tests/queries/0_stateless/02883_zookeeper_finalize_stress.sh b/tests/queries/0_stateless/02883_zookeeper_finalize_stress.sh index dc7d67fbdd4..c883cd8f58a 100755 --- a/tests/queries/0_stateless/02883_zookeeper_finalize_stress.sh +++ b/tests/queries/0_stateless/02883_zookeeper_finalize_stress.sh @@ -7,4 +7,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -yes /keeper/api_version | head -n1000 | xargs -P30 -i $CLICKHOUSE_KEEPER_CLIENT -q 'get {}' > /dev/null +yes /keeper/api_version | head -n1000 | xargs -P30 -i $CLICKHOUSE_KEEPER_CLIENT -q "get '{}'" > /dev/null diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference index 0589fdeef04..a03343c8cb3 100644 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference @@ -35,5 +35,8 @@ OK 2 2 6 6 9 9 +===== TestInsertChain ===== 1000 1000 +===== TestOnCluster ===== +1 diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh index f32aee44bee..dd869cd9988 100755 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh @@ -260,6 +260,8 @@ EOF ${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_row_2" +echo "===== TestInsertChain =====" + ${CLICKHOUSE_CLIENT} --multiquery </g" || : echo "tables:" - $CLICKHOUSE_KEEPER_CLIENT -q "ls /test/02922/${CLICKHOUSE_DATABASE}" | grep -o "table" || : + $CLICKHOUSE_KEEPER_CLIENT -q "ls '/test/02922/${CLICKHOUSE_DATABASE}'" | grep -o "table" || : } list_keeper_nodes "${table_shared_id}" diff --git a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference index 3124698d218..f9b72ba9c6a 100644 --- a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference +++ b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference @@ -245,21 +245,21 @@ EXPLAIN SYNTAX (SELECT 2 * count(uint64) - sum(uint64) From test_table); SELECT (2 * count(uint64)) - sum(uint64) FROM test_table SELECT sum(float64 + 2) From test_table; -26.5 +26.875 SELECT sum(2 + float64) From test_table; -26.5 +26.875 SELECT sum(float64 - 2) From test_table; -6.5 +6.875 SELECT sum(2 - float64) From test_table; --6.5 +-6.875 SELECT sum(float64) + 2 * count(float64) From test_table; -26.5 +26.875 SELECT 2 * count(float64) + sum(float64) From test_table; -26.5 +26.875 SELECT sum(float64) - 2 * count(float64) From test_table; -6.5 +6.875 SELECT 2 * count(float64) - sum(float64) From test_table; --6.5 +-6.875 EXPLAIN SYNTAX (SELECT sum(float64 + 2) From test_table); SELECT sum(float64) + (2 * count(float64)) FROM test_table @@ -375,25 +375,25 @@ EXPLAIN SYNTAX (SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - SELECT ((2 * count(uint64)) - sum(uint64)) + ((3 * count(uint64)) - sum(uint64)) FROM test_table SELECT sum(float64 + 2) + sum(float64 + 3) From test_table; -58 +58.75 SELECT sum(float64 + 2) - sum(float64 + 3) From test_table; -5 SELECT sum(float64 - 2) + sum(float64 - 3) From test_table; -8 +8.75 SELECT sum(float64 - 2) - sum(float64 - 3) From test_table; 5 SELECT sum(2 - float64) - sum(3 - float64) From test_table; -5 SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table; -58 +58.75 SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table; -5 SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table; -8 +8.75 SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table; 5 SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table; --8 +-8.75 EXPLAIN SYNTAX (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table); SELECT (sum(float64) + (2 * count(float64))) + (sum(float64) + (3 * count(float64))) FROM test_table diff --git a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql index c7b0ff82442..94baee6f1ba 100644 --- a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql +++ b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql @@ -23,11 +23,12 @@ CREATE TABLE test_table decimal32 Decimal32(5), ) ENGINE=MergeTree ORDER BY uint64; -INSERT INTO test_table VALUES (1, 1.1, 1.11); -INSERT INTO test_table VALUES (2, 2.2, 2.22); -INSERT INTO test_table VALUES (3, 3.3, 3.33); -INSERT INTO test_table VALUES (4, 4.4, 4.44); -INSERT INTO test_table VALUES (5, 5.5, 5.55); +-- Use Float64 numbers divisible by 1/16 (or some other small power of two), so that their sum doesn't depend on summation order. +INSERT INTO test_table VALUES (1, 1.125, 1.11); +INSERT INTO test_table VALUES (2, 2.250, 2.22); +INSERT INTO test_table VALUES (3, 3.375, 3.33); +INSERT INTO test_table VALUES (4, 4.500, 4.44); +INSERT INTO test_table VALUES (5, 5.625, 5.55); -- { echoOn } SELECT sum(uint64 + 1 AS i) from test_table where i > 0; diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference index 4a6bc8498e1..298cc908178 100644 --- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference @@ -1,20 +1,20 @@ -100 10 10 10 0 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 16 +100 10 10 10 0 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 0 10 98 set max_size from 100 to 10 -10 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 16 +10 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 1 8 set max_size from 10 to 100 -100 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 16 +100 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 10 98 set max_elements from 10 to 2 -100 2 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 16 +100 2 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 2 18 set max_elements from 2 to 10 -100 10 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 16 +100 10 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 10 98 diff --git a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh index 45e65b18e07..b1d1c483396 100755 --- a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh +++ b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-ordinary-database, use-rocksdb +# Tags: no-ordinary-database, use-rocksdb, no-random-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -45,4 +45,3 @@ ${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 F ${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers_mt(1000000)" & wait ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;" - diff --git a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql index 8f67cd7e030..7ebef866360 100644 --- a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql +++ b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql @@ -22,7 +22,7 @@ create table test (a Int32) engine = MergeTree() order by tuple() settings disk=disk(name='test2', type = object_storage, object_storage_type = s3, - metadata_storage_type = local, + metadata_type = local, endpoint = 'http://localhost:11111/test/common/', access_key_id = clickhouse, secret_access_key = clickhouse); @@ -32,7 +32,7 @@ create table test (a Int32) engine = MergeTree() order by tuple() settings disk=disk(name='test3', type = object_storage, object_storage_type = s3, - metadata_storage_type = local, + metadata_type = local, metadata_keep_free_space_bytes = 1024, endpoint = 'http://localhost:11111/test/common/', access_key_id = clickhouse, @@ -43,7 +43,7 @@ create table test (a Int32) engine = MergeTree() order by tuple() settings disk=disk(name='test4', type = object_storage, object_storage_type = s3, - metadata_storage_type = local, + metadata_type = local, metadata_keep_free_space_bytes = 0, endpoint = 'http://localhost:11111/test/common/', access_key_id = clickhouse, diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference index d00491fd7e5..72749c905a3 100644 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference @@ -1 +1 @@ -1 +1 1 1 diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh index 1c72cf2b8c1..263a4535c0e 100755 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh @@ -11,4 +11,4 @@ $CLICKHOUSE_CLIENT --query_id $query_id --log_query_threads 1 --query="select nu $CLICKHOUSE_CLIENT -q "system flush logs;" -$CLICKHOUSE_CLIENT -q "select count() > 1 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase() and thread_name = 'AggregDestruct';" +$CLICKHOUSE_CLIENT -q "select count() > 0, (countIf(thread_name = 'AggregDestruct') as aggs) > 0, aggs > 1 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase();" diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql index 34ba034f798..d8b5ebb3148 100644 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql +++ b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql @@ -4,9 +4,9 @@ drop table if exists rmt_master; drop table if exists rmt_slave; -create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by tuple() settings always_fetch_merged_part=0; +create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by tuple() settings always_fetch_merged_part=0, old_parts_lifetime=600; -- prefer_fetch_merged_part_*_threshold=0, consider this table as a "slave" -create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by tuple() settings prefer_fetch_merged_part_time_threshold=0, prefer_fetch_merged_part_size_threshold=0; +create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by tuple() settings prefer_fetch_merged_part_time_threshold=0, prefer_fetch_merged_part_size_threshold=0, old_parts_lifetime=600; insert into rmt_master values (1); diff --git a/tests/queries/0_stateless/03013_json_key_ignore_case.reference b/tests/queries/0_stateless/03013_json_key_ignore_case.reference new file mode 100644 index 00000000000..54683d8fbc5 --- /dev/null +++ b/tests/queries/0_stateless/03013_json_key_ignore_case.reference @@ -0,0 +1,3 @@ +1 77328912 Ben +2 77328913 Jim +3 77328914 Bill diff --git a/tests/queries/0_stateless/03013_json_key_ignore_case.sh b/tests/queries/0_stateless/03013_json_key_ignore_case.sh new file mode 100755 index 00000000000..807e743b22a --- /dev/null +++ b/tests/queries/0_stateless/03013_json_key_ignore_case.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# NOTE: this sh wrapper is required because of shell_config + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') + +cp "$CURDIR"/data_json/key_ignore_case.json $USER_FILES_PATH/ + +$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" +$CLICKHOUSE_CLIENT -q "create table test_tbl (id UInt16, reqid UInt32, name String) engine=MergeTree order by id" +$CLICKHOUSE_CLIENT -q "INSERT INTO test_tbl SELECT * FROM file('key_ignore_case.json', 'JSONEachRow') SETTINGS input_format_json_ignore_key_case=true" +$CLICKHOUSE_CLIENT -q "select * from test_tbl" +$CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file diff --git a/tests/queries/0_stateless/03015_parser_shortcut_lexer_errors.reference b/tests/queries/0_stateless/03015_parser_shortcut_lexer_errors.reference new file mode 100644 index 00000000000..f83d884fd78 --- /dev/null +++ b/tests/queries/0_stateless/03015_parser_shortcut_lexer_errors.reference @@ -0,0 +1 @@ +Syntax error diff --git a/tests/queries/0_stateless/03015_parser_shortcut_lexer_errors.sh b/tests/queries/0_stateless/03015_parser_shortcut_lexer_errors.sh new file mode 100755 index 00000000000..762201ed5fc --- /dev/null +++ b/tests/queries/0_stateless/03015_parser_shortcut_lexer_errors.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --query "SELECT((((((((((SELECT(((((((((SELECT((((((((((SELECT(((((((((SELECT((((((((((SELECT(((((((((SELECT 1+)))))))))))))))))))))))))))))))))))))))))))))))))))))))))'" 2>&1 | grep -o -F 'Syntax error' diff --git a/tests/queries/0_stateless/03071_fix_short_circuit_logic.reference b/tests/queries/0_stateless/03071_fix_short_circuit_logic.reference new file mode 100644 index 00000000000..48aedfc3958 --- /dev/null +++ b/tests/queries/0_stateless/03071_fix_short_circuit_logic.reference @@ -0,0 +1 @@ +2024-01-02 16:54:59 diff --git a/tests/queries/0_stateless/03071_fix_short_circuit_logic.sql b/tests/queries/0_stateless/03071_fix_short_circuit_logic.sql new file mode 100644 index 00000000000..7745bceca0b --- /dev/null +++ b/tests/queries/0_stateless/03071_fix_short_circuit_logic.sql @@ -0,0 +1,62 @@ + + +CREATE FUNCTION IF NOT EXISTS unhexPrefixed AS value -> unhex(substring(value, 3)); +CREATE FUNCTION IF NOT EXISTS hex2bytes AS address -> CAST(unhexPrefixed(address), 'FixedString(20)'); +CREATE FUNCTION IF NOT EXISTS bytes2hex AS address -> concat('0x', lower(hex(address))); + +CREATE TABLE test +( + `transfer_id` String, + `address` FixedString(20), + `value` UInt256, + `block_timestamp` DateTime('UTC'), + `token_address` FixedString(20) +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(block_timestamp) +PRIMARY KEY (address, block_timestamp) +ORDER BY (address, block_timestamp); + +INSERT INTO test SELECT 'token-transfer-0x758f1bbabb160683e1c80ed52dcd24a32b599d40edf1cec91b5f1199c0e392a2-56', hex2bytes('0xd387a6e4e84a6c86bd90c158c6028a58cc8ac459'), 3000000000000000000000, '2024-01-02 16:54:59', 'abc'; + +CREATE TABLE token_data +( + token_address_hex String, + chain String, + is_blacklisted Bool +) +ENGINE = TinyLog; + +INSERT INTO token_data SELECT bytes2hex('abc'), 'zksync', false; + +CREATE DICTIONARY token_data_map +( + token_address_hex String, + chain String, + is_blacklisted Bool +) +PRIMARY KEY token_address_hex, chain +SOURCE(Clickhouse(table token_data)) +LIFETIME(MIN 200 MAX 300) +LAYOUT(COMPLEX_KEY_HASHED_ARRAY()); + +SELECT block_timestamp +FROM +( + SELECT + block_timestamp, + bytes2hex(token_address) AS token_address_hex + FROM + ( + SELECT + transfer_id, + address, + value, + block_timestamp, + token_address, + 'zksync' AS chain + FROM test + ) + WHERE (address = hex2bytes('0xd387a6e4e84a6c86bd90c158c6028a58cc8ac459')) AND (transfer_id NOT LIKE 'gas%') AND (value > 0) AND (dictGetOrDefault(token_data_map, 'is_blacklisted', (token_address_hex, 'zksync'), true)) +) +SETTINGS max_threads = 1, short_circuit_function_evaluation = 'enable', allow_experimental_analyzer = 0; \ No newline at end of file diff --git a/tests/queries/0_stateless/03094_one_thousand_joins.sql b/tests/queries/0_stateless/03094_one_thousand_joins.sql index ea159f0e4c0..1f6bd99df7f 100644 --- a/tests/queries/0_stateless/03094_one_thousand_joins.sql +++ b/tests/queries/0_stateless/03094_one_thousand_joins.sql @@ -1,6 +1,7 @@ -- Tags: no-fasttest, no-tsan, long -- (no-tsan because it has a small maximum stack size and the test would fail with TOO_DEEP_RECURSION) +SET join_algorithm = 'default'; -- for 'full_sorting_merge' the query is 10x slower SET allow_experimental_analyzer = 1; -- old analyzer returns TOO_DEEP_SUBQUERIES -- Bug 33446, marked as 'long' because it still runs around 10 sec diff --git a/tests/queries/0_stateless/03095_window_functions_qualify.reference b/tests/queries/0_stateless/03095_window_functions_qualify.reference index c74a212428b..9ffbe64f85e 100644 --- a/tests/queries/0_stateless/03095_window_functions_qualify.reference +++ b/tests/queries/0_stateless/03095_window_functions_qualify.reference @@ -48,8 +48,9 @@ Positions: 2 0 INPUT :: 1 -> count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)) UInt64 : 1 INPUT : 2 -> count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)) UInt64 : 2 COLUMN Const(UInt8) -> 4_UInt8 UInt8 : 3 - FUNCTION equals(count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)) :: 2, 4_UInt8 :: 3) -> equals(count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)), 4_UInt8) UInt8 : 4 - Positions: 4 0 1 + INPUT :: 3 -> modulo(__table1.number, 3_UInt8) UInt8 : 4 + FUNCTION equals(count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)) :: 2, 4_UInt8 :: 3) -> equals(count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)), 4_UInt8) UInt8 : 5 + Positions: 5 0 1 Window (Window step for window \'PARTITION BY modulo(__table1.number, 3_UInt8)\') Header: modulo(__table1.number, 3_UInt8) UInt8 __table1.number UInt64 diff --git a/tests/queries/0_stateless/03135_keeper_client_find_commands.sh b/tests/queries/0_stateless/03135_keeper_client_find_commands.sh index 0f57694028d..43ffdec7346 100755 --- a/tests/queries/0_stateless/03135_keeper_client_find_commands.sh +++ b/tests/queries/0_stateless/03135_keeper_client_find_commands.sh @@ -6,24 +6,24 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) path="/test-keeper-client-$CLICKHOUSE_DATABASE" -$CLICKHOUSE_KEEPER_CLIENT -q "rm $path" >& /dev/null +$CLICKHOUSE_KEEPER_CLIENT -q "rm '$path'" >& /dev/null -$CLICKHOUSE_KEEPER_CLIENT -q "create $path 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/a 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/a/a 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/b 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/c 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d/a 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d/b 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d/c 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/a' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/a/a' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/b' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/c' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/d' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/d/a' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/d/b' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/d/c' 'foobar'" echo 'find_super_nodes' $CLICKHOUSE_KEEPER_CLIENT -q "find_super_nodes 1000000000" -$CLICKHOUSE_KEEPER_CLIENT -q "find_super_nodes 3 $path" | sort +$CLICKHOUSE_KEEPER_CLIENT -q "find_super_nodes 3 '$path'" | sort echo 'find_big_family' -$CLICKHOUSE_KEEPER_CLIENT -q "find_big_family $path 3" +$CLICKHOUSE_KEEPER_CLIENT -q "find_big_family '$path' 3" -$CLICKHOUSE_KEEPER_CLIENT -q "rmr $path" +$CLICKHOUSE_KEEPER_CLIENT -q "rmr '$path'" diff --git a/tests/queries/0_stateless/03156_group_concat.reference b/tests/queries/0_stateless/03156_group_concat.reference index 75b347be0c4..c1ab35e96c0 100644 --- a/tests/queries/0_stateless/03156_group_concat.reference +++ b/tests/queries/0_stateless/03156_group_concat.reference @@ -4,6 +4,10 @@ 95123 abcamakson95 [1,2,3][993,986,979,972][] +[1,2,3] +abcamakson95 +95123 +95\n123 95,123 abc,a,makson95 [1,2,3],[993,986,979,972] @@ -12,3 +16,4 @@ abc,a,makson95 abc,a,makson95,abc,a,makson95,abc,a,makson95 [1,2,3][993,986,979,972][][1,2,3][993,986,979,972][][1,2,3][993,986,979,972][] 488890 +488890 diff --git a/tests/queries/0_stateless/03156_group_concat.sql b/tests/queries/0_stateless/03156_group_concat.sql index c14fde8943a..0d561c69f0a 100644 --- a/tests/queries/0_stateless/03156_group_concat.sql +++ b/tests/queries/0_stateless/03156_group_concat.sql @@ -16,6 +16,11 @@ SELECT groupConcat(p_int) FROM test_groupConcat; SELECT groupConcat(p_string) FROM test_groupConcat; SELECT groupConcat(p_array) FROM test_groupConcat; +SELECT groupConcat('', 1)(p_array) FROM test_groupConcat; +SELECT groupConcat('', 3)(p_string) FROM test_groupConcat; +SELECT groupConcat('', 2)(p_int) FROM test_groupConcat; +SELECT groupConcat('\n', 3)(p_int) FROM test_groupConcat; + SELECT groupConcat(',')(p_int) FROM test_groupConcat; SELECT groupConcat(',')(p_string) FROM test_groupConcat; SELECT groupConcat(',', 2)(p_array) FROM test_groupConcat; @@ -38,3 +43,15 @@ SELECT groupConcat(',', 3, 3)(number) FROM numbers(10); -- { serverError TOO_MAN SELECT length(groupConcat(number)) FROM numbers(100000); DROP TABLE IF EXISTS test_groupConcat; + +CREATE TABLE test_groupConcat +( + id UInt64, + p_int Int32, +) ENGINE = MergeTree ORDER BY id; + +INSERT INTO test_groupConcat SELECT number, number FROM numbers(100000) SETTINGS min_insert_block_size_rows = 2000; + +SELECT length(groupConcat(p_int)) FROM test_groupConcat; + +DROP TABLE IF EXISTS test_groupConcat; diff --git a/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.reference b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.reference new file mode 100644 index 00000000000..1526555f6c8 --- /dev/null +++ b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.reference @@ -0,0 +1,8 @@ +selects_with_pk_usage +0 +selects_with_pk_usage +0 +selects_with_pk_usage +1 +selects_with_pk_usage +1 diff --git a/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh new file mode 100755 index 00000000000..29d4c877909 --- /dev/null +++ b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash + +# Tests profile event "SelectedMarksByPrimaryKeyUsage" + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +table_id="$(random_str 10)" + +$CLICKHOUSE_CLIENT -q " + DROP TABLE IF EXISTS table_$table_id;" + +$CLICKHOUSE_CLIENT -q " + CREATE TABLE table_$table_id ( + pk Int64, + col1 Int64, + col2 Int64, + INDEX idx(col2) TYPE minmax + ) ENGINE = MergeTree ORDER BY pk PARTITION BY (pk % 2);"; + +$CLICKHOUSE_CLIENT -q " + ALTER TABLE table_$table_id ADD PROJECTION proj (SELECT * ORDER BY col1);" + +# Populate two partitions with 50k rows each. Each partition has >1 granules. +# We want SelectQueriesWithPrimaryKeyUsage to increase by +1 in each query, not by +1 per partition or by +1 per granule. +$CLICKHOUSE_CLIENT -q " + INSERT INTO table_$table_id SELECT number, number, number FROM numbers(100000);" + +# Run SELECTs + +# -- No filter +query_id="$(random_str 10)" +$CLICKHOUSE_CLIENT --query_id "$query_id" -q " + SELECT count(*) FROM table_$table_id FORMAT Null;" +$CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage + FROM + system.query_log + WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query_id = '$query_id' + FORMAT TSVWithNames; +" + +# -- Filter on non-PK column. However, it has a minmax-index defined. We expect the profile event to not increase. +query_id="$(random_str 10)" +$CLICKHOUSE_CLIENT --query_id "$query_id" -q " + SELECT count(*) FROM table_$table_id WHERE col2 >= 50000 FORMAT Null;" +$CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage + FROM + system.query_log + WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query_id = '$query_id' + FORMAT TSVWithNames; +" + +# Filter on PK +query_id="$(random_str 10)" +$CLICKHOUSE_CLIENT --query_id "$query_id" -q " + SELECT count(*) FROM table_$table_id WHERE pk >= 50000 FORMAT Null;" +$CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage + FROM + system.query_log + WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query_id = '$query_id' + FORMAT TSVWithNames; +" + +# Filter on PK in projection +query_id="$(random_str 10)" +$CLICKHOUSE_CLIENT --query_id "$query_id" -q " + SELECT count(*) FROM table_$table_id WHERE col1 >= 50000 FORMAT Null;" +$CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage + FROM + system.query_log + WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query_id = '$query_id' + FORMAT TSVWithNames; +" + +$CLICKHOUSE_CLIENT -q " + DROP TABLE table_$table_id;" diff --git a/tests/queries/0_stateless/03167_base64_url_functions.sql b/tests/queries/0_stateless/03167_base64_url_functions.sql index 674f1ae498b..6c394ba6c3a 100644 --- a/tests/queries/0_stateless/03167_base64_url_functions.sql +++ b/tests/queries/0_stateless/03167_base64_url_functions.sql @@ -2,35 +2,35 @@ -- no-fasttest because aklomp-base64 library is required -- incorrect number of arguments -SELECT base64UrlEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT base64UrlDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT tryBase64UrlDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT base64UrlEncode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT base64UrlDecode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT tryBase64UrlDecode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64URLEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64URLDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryBase64URLDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64URLEncode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64URLDecode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryBase64URLDecode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -- test with valid inputs -SELECT 'https://clickhouse.com' AS original, base64UrlEncode(original) AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); -SELECT '12?' AS original, base64UrlEncode(original) AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); -SELECT 'https://www.google.com/search?q=clickhouse+base64+decode&sca_esv=739f8bb380e4c7ed&ei=TfRiZqCDIrmnwPAP2KLRkA8&ved=0ahUKEwjg3ZHitsmGAxW5ExAIHVhRFPIQ4dUDCBA&uact=5&oq=clickhouse+base64+decode' AS original, base64UrlEncode(original) AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); +SELECT 'https://clickhouse.com' AS original, base64URLEncode(original) AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); +SELECT '12?' AS original, base64URLEncode(original) AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); +SELECT 'https://www.google.com/search?q=clickhouse+base64+decode&sca_esv=739f8bb380e4c7ed&ei=TfRiZqCDIrmnwPAP2KLRkA8&ved=0ahUKEwjg3ZHitsmGAxW5ExAIHVhRFPIQ4dUDCBA&uact=5&oq=clickhouse+base64+decode' AS original, base64URLEncode(original) AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); -- encoded value has no padding -SELECT 'aHR0cHM6Ly9jbGlj' AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); +SELECT 'aHR0cHM6Ly9jbGlj' AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); -- encoded value has one-byte padding -SELECT 'aHR0cHM6Ly9jbGlja2g' AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); +SELECT 'aHR0cHM6Ly9jbGlja2g' AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); -- encoded value has two-bytes padding -SELECT 'aHR0cHM6Ly9jbGljaw' AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); +SELECT 'aHR0cHM6Ly9jbGljaw' AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); -- test with invalid inputs -SELECT base64UrlDecode('https://clickhouse.com'); -- { serverError INCORRECT_DATA } -SELECT tryBase64UrlDecode('https://clickhouse.com'); -SELECT base64UrlDecode('12?'); -- { serverError INCORRECT_DATA } -SELECT tryBase64UrlDecode('12?'); -SELECT base64UrlDecode('aHR0cHM6Ly9jbGlja'); -- { serverError INCORRECT_DATA } -SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja'); +SELECT base64URLDecode('https://clickhouse.com'); -- { serverError INCORRECT_DATA } +SELECT tryBase64URLDecode('https://clickhouse.com'); +SELECT base64URLDecode('12?'); -- { serverError INCORRECT_DATA } +SELECT tryBase64URLDecode('12?'); +SELECT base64URLDecode('aHR0cHM6Ly9jbGlja'); -- { serverError INCORRECT_DATA } +SELECT tryBase64URLDecode('aHR0cHM6Ly9jbGlja'); -- test FixedString argument -SELECT toFixedString('https://clickhouse.com', 22) AS original, base64UrlEncode(original) AS encoded, base64UrlDecode(encoded), tryBase64UrlDecode(encoded); +SELECT toFixedString('https://clickhouse.com', 22) AS original, base64URLEncode(original) AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); diff --git a/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference b/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference new file mode 100644 index 00000000000..951910bbe74 --- /dev/null +++ b/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference @@ -0,0 +1,5 @@ +OK +123 +123 +123 +123 diff --git a/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql b/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql new file mode 100644 index 00000000000..1ac5540047a --- /dev/null +++ b/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql @@ -0,0 +1,7 @@ +SET allow_experimental_analyzer=1; +CREATE OR REPLACE VIEW param_test AS SELECT {test_str:String} as s_result; +WITH 'OK' AS s SELECT * FROM param_test(test_str=s); +WITH (SELECT 123) AS s SELECT * FROM param_test(test_str=s); +WITH (SELECT 100 + 20 + 3) AS s SELECT * FROM param_test(test_str=s); +WITH (SELECT number FROM numbers(123, 1)) AS s SELECT * FROM param_test(test_str=s); +WITH CAST(123, 'String') AS s SELECT * FROM param_test(test_str=s); diff --git a/tests/queries/0_stateless/03168_inconsistent_ast_formatting.reference b/tests/queries/0_stateless/03168_inconsistent_ast_formatting.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03168_inconsistent_ast_formatting.sql b/tests/queries/0_stateless/03168_inconsistent_ast_formatting.sql new file mode 100644 index 00000000000..5333ea29ce7 --- /dev/null +++ b/tests/queries/0_stateless/03168_inconsistent_ast_formatting.sql @@ -0,0 +1,7 @@ +create table a (x `Null`); -- { clientError SYNTAX_ERROR } +create table a (x f(`Null`)); -- { clientError SYNTAX_ERROR } +create table a (x Enum8(f(`Null`, 'World', 2))); -- { clientError SYNTAX_ERROR } +create table a (`value2` Enum8('Hello' = 1, equals(`Null`, 'World', 2), '!' = 3)); -- { clientError SYNTAX_ERROR } + +create table a (x Int8) engine Memory; +create table b empty as a; diff --git a/tests/queries/0_stateless/03168_query_log_privileges_not_empty.reference b/tests/queries/0_stateless/03168_query_log_privileges_not_empty.reference new file mode 100644 index 00000000000..e3ac97f9945 --- /dev/null +++ b/tests/queries/0_stateless/03168_query_log_privileges_not_empty.reference @@ -0,0 +1,5 @@ +1 +3168 8613 +[] ['SELECT(a, b) ON default.d_03168_query_log'] +[] [] +['SELECT(a, b) ON default.d_03168_query_log'] [] diff --git a/tests/queries/0_stateless/03168_query_log_privileges_not_empty.sh b/tests/queries/0_stateless/03168_query_log_privileges_not_empty.sh new file mode 100755 index 00000000000..9abc635a874 --- /dev/null +++ b/tests/queries/0_stateless/03168_query_log_privileges_not_empty.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +user_name="u_03168_query_log" +table_name="default.d_03168_query_log" +test_query="select a, b from ${table_name}" + +${CLICKHOUSE_CLIENT_BINARY} --query "drop user if exists ${user_name}" +${CLICKHOUSE_CLIENT_BINARY} --query "create user ${user_name}" +${CLICKHOUSE_CLIENT_BINARY} --query "drop table if exists ${table_name}" +${CLICKHOUSE_CLIENT_BINARY} --query "create table ${table_name} (a UInt64, b UInt64) order by a" + +${CLICKHOUSE_CLIENT_BINARY} --query "insert into table ${table_name} values (3168, 8613)" + +error="$(${CLICKHOUSE_CLIENT_BINARY} --user ${user_name} --query "${test_query}" 2>&1 >/dev/null)" +echo "${error}" | grep -Fc "ACCESS_DENIED" + +${CLICKHOUSE_CLIENT_BINARY} --query "grant select(a, b) on ${table_name} to ${user_name}" + +${CLICKHOUSE_CLIENT_BINARY} --user ${user_name} --query "${test_query}" + +${CLICKHOUSE_CLIENT_BINARY} --query "system flush logs" +${CLICKHOUSE_CLIENT_BINARY} --query "select used_privileges, missing_privileges from system.query_log where query = '${test_query}' and type = 'ExceptionBeforeStart' and current_database = currentDatabase() order by event_time desc limit 1" +${CLICKHOUSE_CLIENT_BINARY} --query "select used_privileges, missing_privileges from system.query_log where query = '${test_query}' and type = 'QueryStart' and current_database = currentDatabase() order by event_time desc limit 1" +${CLICKHOUSE_CLIENT_BINARY} --query "select used_privileges, missing_privileges from system.query_log where query = '${test_query}' and type = 'QueryFinish' and current_database = currentDatabase() order by event_time desc limit 1" + +${CLICKHOUSE_CLIENT_BINARY} --query "drop table ${table_name}" +${CLICKHOUSE_CLIENT_BINARY} --query "drop user ${user_name}" diff --git a/tests/queries/0_stateless/03169_cache_complex_dict_short_circuit_bug.sql b/tests/queries/0_stateless/03169_cache_complex_dict_short_circuit_bug.sql index 8463d13d251..f91aaf39081 100644 --- a/tests/queries/0_stateless/03169_cache_complex_dict_short_circuit_bug.sql +++ b/tests/queries/0_stateless/03169_cache_complex_dict_short_circuit_bug.sql @@ -27,5 +27,5 @@ LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 10)); SELECT dictGetOrDefault('cache_dictionary_complex_key_simple_attributes_short_circuit', 'value_first', (number, concat(toString(number))), toString(materialize('default'))) AS value_first FROM system.numbers LIMIT 20 FORMAT Null; SELECT dictGetOrDefault('cache_dictionary_complex_key_simple_attributes_short_circuit', 'value_first', (number, concat(toString(number))), toString(materialize('default'))) AS value_first FROM system.numbers LIMIT 20 FORMAT Null; -DROP TABLE IF EXISTS complex_key_simple_attributes_source_short_circuit_table; DROP DICTIONARY IF EXISTS cache_dictionary_complex_key_simple_attributes_short_circuit; +DROP TABLE IF EXISTS complex_key_simple_attributes_source_short_circuit_table; diff --git a/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.reference b/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.reference new file mode 100644 index 00000000000..6e8a5df9145 --- /dev/null +++ b/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.reference @@ -0,0 +1,9 @@ +121 +121 +32 +21 +10 +32 +22 +11 +1 diff --git a/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.sql b/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.sql new file mode 100644 index 00000000000..2d566e52c94 --- /dev/null +++ b/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.sql @@ -0,0 +1,35 @@ +DROP TABLE IF EXISTS test_hilbert_encode_hilbert_encode; + +CREATE TABLE test_hilbert_encode (x UInt32, y UInt32) ENGINE = MergeTree ORDER BY hilbertEncode(x, y) SETTINGS index_granularity = 8192, index_granularity_bytes = '1Mi'; +INSERT INTO test_hilbert_encode SELECT number DIV 1024, number % 1024 FROM numbers(1048576); + +SET max_rows_to_read = 8192, force_primary_key = 1, analyze_index_with_space_filling_curves = 1; +SELECT count() FROM test_hilbert_encode WHERE x >= 10 AND x <= 20 AND y >= 20 AND y <= 30; + +SET max_rows_to_read = 8192, force_primary_key = 1, analyze_index_with_space_filling_curves = 0; +SELECT count() FROM test_hilbert_encode WHERE x >= 10 AND x <= 20 AND y >= 20 AND y <= 30; -- { serverError 277 } + +DROP TABLE test_hilbert_encode; + +-- The same, but with more precise index + +CREATE TABLE test_hilbert_encode (x UInt32, y UInt32) ENGINE = MergeTree ORDER BY hilbertEncode(x, y) SETTINGS index_granularity = 1; +SET max_rows_to_read = 0; +INSERT INTO test_hilbert_encode SELECT number DIV 32, number % 32 FROM numbers(1024); + +SET max_rows_to_read = 200, force_primary_key = 1, analyze_index_with_space_filling_curves = 1; +SELECT count() FROM test_hilbert_encode WHERE x >= 10 AND x <= 20 AND y >= 20 AND y <= 30; + +-- Various other conditions + +SELECT count() FROM test_hilbert_encode WHERE x = 10 SETTINGS max_rows_to_read = 49; +SELECT count() FROM test_hilbert_encode WHERE x = 10 AND y > 10 SETTINGS max_rows_to_read = 33; +SELECT count() FROM test_hilbert_encode WHERE x = 10 AND y < 10 SETTINGS max_rows_to_read = 15; + +SELECT count() FROM test_hilbert_encode WHERE y = 10 SETTINGS max_rows_to_read = 50; +SELECT count() FROM test_hilbert_encode WHERE x >= 10 AND y = 10 SETTINGS max_rows_to_read = 35; +SELECT count() FROM test_hilbert_encode WHERE y = 10 AND x <= 10 SETTINGS max_rows_to_read = 17; + +SELECT count() FROM test_hilbert_encode PREWHERE x >= 10 WHERE x < 11 AND y = 10 SETTINGS max_rows_to_read = 2; + +DROP TABLE test_hilbert_encode; diff --git a/tests/queries/0_stateless/03172_error_log_table_not_empty.reference b/tests/queries/0_stateless/03172_error_log_table_not_empty.reference new file mode 100644 index 00000000000..a9e2f17562a --- /dev/null +++ b/tests/queries/0_stateless/03172_error_log_table_not_empty.reference @@ -0,0 +1,6 @@ +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh new file mode 100755 index 00000000000..8d74ebe1039 --- /dev/null +++ b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Get the previous number of errors for 111, 222 and 333 +errors_111=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 111") +errors_222=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 222") +errors_333=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 333") + +# Throw three random errors: 111, 222 and 333 and wait for more than collect_interval_milliseconds to ensure system.error_log is flushed +$CLICKHOUSE_CLIENT -mn -q " +SELECT throwIf(true, 'error_log', toInt16(111)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 111 } +SELECT throwIf(true, 'error_log', toInt16(222)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 222 } +SELECT throwIf(true, 'error_log', toInt16(333)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 333 } +SELECT sleep(2) format NULL; +SYSTEM FLUSH LOGS; +" + +# Check that the three random errors are propagated +$CLICKHOUSE_CLIENT -mn -q " +SELECT sum(value) > $errors_111 FROM system.error_log WHERE code = 111; +SELECT sum(value) > $errors_222 FROM system.error_log WHERE code = 222; +SELECT sum(value) > $errors_333 FROM system.error_log WHERE code = 333; +" + +# Ensure that if we throw them again, they're still propagated +$CLICKHOUSE_CLIENT -mn -q " +SELECT throwIf(true, 'error_log', toInt16(111)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 111 } +SELECT throwIf(true, 'error_log', toInt16(222)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 222 } +SELECT throwIf(true, 'error_log', toInt16(333)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 333 } +SELECT sleep(2) format NULL; +SYSTEM FLUSH LOGS; +" + +$CLICKHOUSE_CLIENT -mn -q " +SELECT sum(value) > $(($errors_111+1)) FROM system.error_log WHERE code = 111; +SELECT sum(value) > $(($errors_222+1)) FROM system.error_log WHERE code = 222; +SELECT sum(value) > $(($errors_333+1)) FROM system.error_log WHERE code = 333; +" \ No newline at end of file diff --git a/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.reference b/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql b/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql new file mode 100644 index 00000000000..0cadd4f5cee --- /dev/null +++ b/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql @@ -0,0 +1,77 @@ +-- Tags: atomic-database + +DROP TABLE IF EXISTS test; +CREATE TABLE test (id UInt64, value String) ENGINE=MergeTree ORDER BY id; +INSERT INTO test SELECT number, 'str_' || toString(number) FROM numbers(10); +DROP DICTIONARY IF EXISTS test_dict; +CREATE DICTIONARY test_dict +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE test)) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); +DROP TABLE IF EXISTS view_source; +CREATE TABLE view_source (id UInt64) ENGINE=MergeTree ORDER BY id; +INSERT INTO view_source SELECT * FROM numbers(5); +DROP VIEW IF EXISTS view; +CREATE VIEW view AS SELECT id, dictGet('test_dict', 'value', id) as value FROM view_source; + +CREATE OR REPLACE DICTIONARY test_dict +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE view)) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); -- {serverError INFINITE_LOOP} + +REPLACE DICTIONARY test_dict +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE view)) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); -- {serverError INFINITE_LOOP} + + +DROP DICTIONARY IF EXISTS test_dict_2; +CREATE DICTIONARY test_dict_2 +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE view)) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); + +EXCHANGE DICTIONARIES test_dict AND test_dict_2; -- {serverError INFINITE_LOOP} + +DROP DICTIONARY test_dict_2; + +CREATE OR REPLACE DICTIONARY test_dict_2 +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE view)) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); + +EXCHANGE DICTIONARIES test_dict AND test_dict_2; -- {serverError INFINITE_LOOP} + +DROP DICTIONARY test_dict; +RENAME DICTIONARY test_dict_2 to test_dict; -- {serverError INFINITE_LOOP} + +DROP DICTIONARY test_dict_2; +DROP VIEW view; +DROP TABLE test; +DROP TABLE view_source; + diff --git a/tests/queries/0_stateless/03173_distinct_combinator_alignment.reference b/tests/queries/0_stateless/03173_distinct_combinator_alignment.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03173_distinct_combinator_alignment.sql b/tests/queries/0_stateless/03173_distinct_combinator_alignment.sql new file mode 100644 index 00000000000..4a066be5086 --- /dev/null +++ b/tests/queries/0_stateless/03173_distinct_combinator_alignment.sql @@ -0,0 +1 @@ +SELECT toTypeName(topKDistinctState(toNullable(10))(toString(number)) IGNORE NULLS) FROM numbers(100) GROUP BY tuple((map((materialize(toNullable(1)), 2), 4, (3, 4), 5), 3)), map((1, 2), 4, (3, 4), toNullable(5)) WITH CUBE WITH TOTALS FORMAT Null diff --git a/tests/queries/0_stateless/03173_forbid_qualify.reference b/tests/queries/0_stateless/03173_forbid_qualify.reference new file mode 100644 index 00000000000..c2f595d8c4b --- /dev/null +++ b/tests/queries/0_stateless/03173_forbid_qualify.reference @@ -0,0 +1,3 @@ +100 +49 +100 diff --git a/tests/queries/0_stateless/03173_forbid_qualify.sql b/tests/queries/0_stateless/03173_forbid_qualify.sql new file mode 100644 index 00000000000..d8cb2bad2ea --- /dev/null +++ b/tests/queries/0_stateless/03173_forbid_qualify.sql @@ -0,0 +1,11 @@ +drop table if exists test_qualify; +create table test_qualify (number Int64) ENGINE = MergeTree ORDER BY (number); + +insert into test_qualify SELECT * FROM numbers(100); + +select count() from test_qualify; -- 100 +select * from test_qualify qualify row_number() over (order by number) = 50 SETTINGS allow_experimental_analyzer = 1; -- 49 +select * from test_qualify qualify row_number() over (order by number) = 50 SETTINGS allow_experimental_analyzer = 0; -- { serverError NOT_IMPLEMENTED } + +delete from test_qualify where number in (select number from test_qualify qualify row_number() over (order by number) = 50); -- { serverError UNFINISHED } +select count() from test_qualify; -- 100 diff --git a/tests/queries/0_stateless/03174_merge_join_bug.reference b/tests/queries/0_stateless/03174_merge_join_bug.reference new file mode 100644 index 00000000000..af98bcd6397 --- /dev/null +++ b/tests/queries/0_stateless/03174_merge_join_bug.reference @@ -0,0 +1,10 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 diff --git a/tests/queries/0_stateless/03174_merge_join_bug.sql b/tests/queries/0_stateless/03174_merge_join_bug.sql new file mode 100644 index 00000000000..ab4cb6cd4a9 --- /dev/null +++ b/tests/queries/0_stateless/03174_merge_join_bug.sql @@ -0,0 +1,10 @@ +-- Tags: no-random-settings + +SET allow_experimental_analyzer=1, join_algorithm = 'full_sorting_merge'; +CREATE TABLE xxxx_yyy (key UInt32, key_b ALIAS key) ENGINE=MergeTree() ORDER BY key SETTINGS ratio_of_defaults_for_sparse_serialization=0.0; +INSERT INTO xxxx_yyy SELECT number FROM numbers(10); + +SELECT * +FROM xxxx_yyy AS a +INNER JOIN xxxx_yyy AS b ON a.key = b.key_b +ORDER BY a.key; diff --git a/tests/queries/0_stateless/03174_split_parts_ranges_into_intersecting_and_non_intersecting_final_and_read-in-order_bug.reference b/tests/queries/0_stateless/03174_split_parts_ranges_into_intersecting_and_non_intersecting_final_and_read-in-order_bug.reference new file mode 100644 index 00000000000..9c849a44af7 --- /dev/null +++ b/tests/queries/0_stateless/03174_split_parts_ranges_into_intersecting_and_non_intersecting_final_and_read-in-order_bug.reference @@ -0,0 +1,116 @@ +2000-01-01 00:00:00 3732436800 3732436800 0 +2000-01-02 00:00:00 11197396800 11197396800 0 +2000-01-03 00:00:00 18662356800 18662356800 0 +2000-01-04 00:00:00 26127316800 26127316800 0 +2000-01-05 00:00:00 33592276800 33592276800 0 +2000-01-06 00:00:00 41057236800 41057236800 0 +2000-01-07 00:00:00 48522196800 48522196800 0 +2000-01-08 00:00:00 55987156800 55987156800 0 +2000-01-09 00:00:00 63452116800 63452116800 0 +2000-01-10 00:00:00 70917076800 70917076800 0 +2000-01-11 00:00:00 78382036800 78382036800 0 +2000-01-12 00:00:00 85846996800 85846996800 0 +2000-01-13 00:00:00 93311956800 93311956800 0 +2000-01-14 00:00:00 100776916800 100776916800 0 +2000-01-15 00:00:00 108241876800 108241876800 0 +2000-01-16 00:00:00 115706836800 115706836800 0 +2000-01-17 00:00:00 123171796800 123171796800 0 +2000-01-18 00:00:00 130636756800 130636756800 0 +2000-01-19 00:00:00 138101716800 138101716800 0 +2000-01-20 00:00:00 145566676800 145566676800 0 +2000-01-21 00:00:00 153031636800 153031636800 0 +2000-01-22 00:00:00 160496596800 160496596800 0 +2000-01-23 00:00:00 167961556800 167961556800 0 +2000-01-24 00:00:00 175426516800 175426516800 0 +2000-01-25 00:00:00 182891476800 182891476800 0 +2000-01-26 00:00:00 190356436800 190356436800 0 +2000-01-27 00:00:00 197821396800 197821396800 0 +2000-01-28 00:00:00 205286356800 205286356800 0 +2000-01-29 00:00:00 212751316800 212751316800 0 +2000-01-30 00:00:00 220216276800 220216276800 0 +2000-01-31 00:00:00 227681236800 227681236800 0 +2000-02-01 00:00:00 235146196800 235146196800 0 +2000-02-02 00:00:00 242611156800 242611156800 0 +2000-02-03 00:00:00 250076116800 250076116800 0 +2000-02-04 00:00:00 257541076800 257541076800 0 +2000-02-05 00:00:00 265006036800 265006036800 0 +2000-02-06 00:00:00 272470996800 272470996800 0 +2000-02-07 00:00:00 279935956800 279935956800 0 +2000-02-08 00:00:00 287400916800 287400916800 0 +2000-02-09 00:00:00 294865876800 294865876800 0 +2000-02-10 00:00:00 302330836800 302330836800 0 +2000-02-11 00:00:00 309795796800 309795796800 0 +2000-02-12 00:00:00 317260756800 317260756800 0 +2000-02-13 00:00:00 324725716800 324725716800 0 +2000-02-14 00:00:00 332190676800 332190676800 0 +2000-02-15 00:00:00 339655636800 339655636800 0 +2000-02-16 00:00:00 347120596800 347120596800 0 +2000-02-17 00:00:00 354585556800 354585556800 0 +2000-02-18 00:00:00 362050516800 362050516800 0 +2000-02-19 00:00:00 369515476800 369515476800 0 +2000-02-20 00:00:00 376980436800 376980436800 0 +2000-02-21 00:00:00 384445396800 384445396800 0 +2000-02-22 00:00:00 391910356800 391910356800 0 +2000-02-23 00:00:00 399375316800 399375316800 0 +2000-02-24 00:00:00 406840276800 406840276800 0 +2000-02-25 00:00:00 414305236800 414305236800 0 +2000-02-26 00:00:00 421770196800 421770196800 0 +2000-02-27 00:00:00 429235156800 429235156800 0 +2000-02-28 00:00:00 436700116800 436700116800 0 +2000-02-29 00:00:00 444165076800 444165076800 0 +2000-03-01 00:00:00 451630036800 451630036800 0 +2000-03-02 00:00:00 459094996800 459094996800 0 +2000-03-03 00:00:00 466559956800 466559956800 0 +2000-03-04 00:00:00 474024916800 474024916800 0 +2000-03-05 00:00:00 481489876800 481489876800 0 +2000-03-06 00:00:00 488954836800 488954836800 0 +2000-03-07 00:00:00 496419796800 496419796800 0 +2000-03-08 00:00:00 503884756800 503884756800 0 +2000-03-09 00:00:00 511349716800 511349716800 0 +2000-03-10 00:00:00 518814676800 518814676800 0 +2000-03-11 00:00:00 526279636800 526279636800 0 +2000-03-12 00:00:00 533744596800 533744596800 0 +2000-03-13 00:00:00 541209556800 541209556800 0 +2000-03-14 00:00:00 548674516800 548674516800 0 +2000-03-15 00:00:00 556139476800 556139476800 0 +2000-03-16 00:00:00 563604436800 563604436800 0 +2000-03-17 00:00:00 571069396800 571069396800 0 +2000-03-18 00:00:00 578534356800 578534356800 0 +2000-03-19 00:00:00 585999316800 585999316800 0 +2000-03-20 00:00:00 593464276800 593464276800 0 +2000-03-21 00:00:00 600929236800 600929236800 0 +2000-03-22 00:00:00 608394196800 608394196800 0 +2000-03-23 00:00:00 615859156800 615859156800 0 +2000-03-24 00:00:00 623324116800 623324116800 0 +2000-03-25 00:00:00 630789076800 630789076800 0 +2000-03-26 00:00:00 638254036800 638254036800 0 +2000-03-27 00:00:00 645718996800 645718996800 0 +2000-03-28 00:00:00 653183956800 653183956800 0 +2000-03-29 00:00:00 660648916800 660648916800 0 +2000-03-30 00:00:00 668113876800 668113876800 0 +2000-03-31 00:00:00 675578836800 675578836800 0 +2000-04-01 00:00:00 683043796800 683043796800 0 +2000-04-02 00:00:00 690508756800 690508756800 0 +2000-04-03 00:00:00 697973716800 697973716800 0 +2000-04-04 00:00:00 705438676800 705438676800 0 +2000-04-05 00:00:00 712903636800 712903636800 0 +2000-04-06 00:00:00 720368596800 720368596800 0 +2000-04-07 00:00:00 727833556800 727833556800 0 +2000-04-08 00:00:00 735298516800 735298516800 0 +2000-04-09 00:00:00 742763476800 742763476800 0 +2000-04-10 00:00:00 750228436800 750228436800 0 +2000-04-11 00:00:00 757693396800 757693396800 0 +2000-04-12 00:00:00 765158356800 765158356800 0 +2000-04-13 00:00:00 772623316800 772623316800 0 +2000-04-14 00:00:00 780088276800 780088276800 0 +2000-04-15 00:00:00 787553236800 787553236800 0 +2000-04-16 00:00:00 795018196800 795018196800 0 +2000-04-17 00:00:00 802483156800 802483156800 0 +2000-04-18 00:00:00 809948116800 809948116800 0 +2000-04-19 00:00:00 817413076800 817413076800 0 +2000-04-20 00:00:00 824878036800 824878036800 0 +2000-04-21 00:00:00 832342996800 832342996800 0 +2000-04-22 00:00:00 839807956800 839807956800 0 +2000-04-23 00:00:00 847272916800 847272916800 0 +2000-04-24 00:00:00 854737876800 854737876800 0 +2000-04-25 00:00:00 637951968000 862202836800 224250868800 diff --git a/tests/queries/0_stateless/03174_split_parts_ranges_into_intersecting_and_non_intersecting_final_and_read-in-order_bug.sql b/tests/queries/0_stateless/03174_split_parts_ranges_into_intersecting_and_non_intersecting_final_and_read-in-order_bug.sql new file mode 100644 index 00000000000..c8da71b7f4d --- /dev/null +++ b/tests/queries/0_stateless/03174_split_parts_ranges_into_intersecting_and_non_intersecting_final_and_read-in-order_bug.sql @@ -0,0 +1,12 @@ +-- Tags: no-tsan, no-asan, no-msan, no-fasttest +-- Test is slow +create table tab (x DateTime('UTC'), y UInt32, v Int32) engine = ReplacingMergeTree(v) order by x; +insert into tab select toDateTime('2000-01-01', 'UTC') + number, number, 1 from numbers(1e7); +optimize table tab final; + +WITH (60 * 60) * 24 AS d +select toStartOfDay(x) as k, sum(y) as v, + (z + d) * (z + d - 1) / 2 - (toUInt64(k - toDateTime('2000-01-01', 'UTC')) as z) * (z - 1) / 2 as est, + est - v as delta +from tab final group by k order by k +settings max_threads=8, optimize_aggregation_in_order=1, split_parts_ranges_into_intersecting_and_non_intersecting_final=1; diff --git a/tests/queries/0_stateless/03195_group_concat_deserialization_fix.reference b/tests/queries/0_stateless/03195_group_concat_deserialization_fix.reference new file mode 100644 index 00000000000..1696fc46554 --- /dev/null +++ b/tests/queries/0_stateless/03195_group_concat_deserialization_fix.reference @@ -0,0 +1,3 @@ +First +First +Second diff --git a/tests/queries/0_stateless/03195_group_concat_deserialization_fix.sql b/tests/queries/0_stateless/03195_group_concat_deserialization_fix.sql new file mode 100644 index 00000000000..337f1f3db24 --- /dev/null +++ b/tests/queries/0_stateless/03195_group_concat_deserialization_fix.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS test_serialization; + +CREATE TABLE test_serialization +( + id UInt64, + text AggregateFunction(groupConcat, String) +) ENGINE = AggregatingMergeTree() ORDER BY id; + +INSERT INTO test_serialization SELECT + 1, + groupConcatState('First'); + +SELECT groupConcatMerge(text) AS concatenated_text FROM test_serialization GROUP BY id; + +INSERT INTO test_serialization SELECT + 2, + groupConcatState('Second'); + +SELECT groupConcatMerge(text) AS concatenated_text FROM test_serialization GROUP BY id ORDER BY id; + +DROP TABLE IF EXISTS test_serialization; + + diff --git a/tests/queries/0_stateless/03196_local_memory_limit.reference b/tests/queries/0_stateless/03196_local_memory_limit.reference new file mode 100644 index 00000000000..f2e22e8aa5b --- /dev/null +++ b/tests/queries/0_stateless/03196_local_memory_limit.reference @@ -0,0 +1 @@ +maximum: 95.37 MiB diff --git a/tests/queries/0_stateless/03196_local_memory_limit.sh b/tests/queries/0_stateless/03196_local_memory_limit.sh new file mode 100755 index 00000000000..346b37be006 --- /dev/null +++ b/tests/queries/0_stateless/03196_local_memory_limit.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --config-file <(echo "100M") --query "SELECT number FROM system.numbers GROUP BY number HAVING count() > 1" 2>&1 | grep -o -P 'maximum: [\d\.]+ MiB' diff --git a/tests/queries/0_stateless/03196_max_intersections_arena_crash.reference b/tests/queries/0_stateless/03196_max_intersections_arena_crash.reference new file mode 100644 index 00000000000..049e7107258 --- /dev/null +++ b/tests/queries/0_stateless/03196_max_intersections_arena_crash.reference @@ -0,0 +1,8 @@ +1 3 3 +1 6 3 +2 5 3 +3 7 3 +1 3 2 +1 6 2 +2 5 2 +3 7 2 diff --git a/tests/queries/0_stateless/03196_max_intersections_arena_crash.sql b/tests/queries/0_stateless/03196_max_intersections_arena_crash.sql new file mode 100644 index 00000000000..b7269d7c4e2 --- /dev/null +++ b/tests/queries/0_stateless/03196_max_intersections_arena_crash.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS my_events; +CREATE TABLE my_events (start UInt32, end UInt32) Engine = MergeTree ORDER BY tuple() + AS Select * FROM VALUES ('start UInt32, end UInt32', (1, 3), (1, 6), (2, 5), (3, 7)); +SELECT start, end, maxIntersections(start, end) OVER () FROM my_events; +SELECT start, end, maxIntersectionsPosition(start, end) OVER () FROM my_events; diff --git a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference new file mode 100644 index 00000000000..bd9ab3be3fa --- /dev/null +++ b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference @@ -0,0 +1,2 @@ +2024-06-20 00:00:00 +2024-06-20 00:00:00 diff --git a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql new file mode 100644 index 00000000000..e83738f7214 --- /dev/null +++ b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql @@ -0,0 +1,2 @@ +SELECT parseDateTime('2024-06-20', '%F', 'UTC') AS x; +SELECT parseDateTime('06/20/24', '%D', 'UTC') AS x; diff --git a/tests/queries/0_stateless/data_json/key_ignore_case.json b/tests/queries/0_stateless/data_json/key_ignore_case.json new file mode 100644 index 00000000000..ad8f7cb4507 Binary files /dev/null and b/tests/queries/0_stateless/data_json/key_ignore_case.json differ diff --git a/utils/backup/backup b/utils/backup/backup new file mode 100755 index 00000000000..6aa9c179033 --- /dev/null +++ b/utils/backup/backup @@ -0,0 +1,47 @@ +#!/bin/bash + +user="default" +path="." + +usage() { + echo + echo "A trivial script to upload your files into ClickHouse." + echo "You might want to use something like Dropbox instead, but..." + echo + echo "Usage: $0 --host [--user ] --password " + exit 1 +} + +while [[ "$#" -gt 0 ]]; do + case "$1" in + --host) + host="$2" + shift 2 + ;; + --user) + user="$2" + shift 2 + ;; + --password) + password="$2" + shift 2 + ;; + --help) + usage + ;; + *) + path="$1" + shift 1 + ;; + esac +done + +if [ -z "$host" ] || [ -z "$password" ]; then + echo "Error: --host and --password are mandatory." + usage +fi + +clickhouse-client --host "$host" --user "$user" --password "$password" --secure --query "CREATE TABLE IF NOT EXISTS default.files (time DEFAULT now(), path String, content String CODEC(ZSTD(6))) ENGINE = MergeTree ORDER BY (path, time)" && +find "$path" -type f | clickhouse-local --input-format LineAsString \ + --max-block-size 1 --min-insert-block-size-rows 0 --min-insert-block-size-bytes '100M' --max-insert-threads 1 \ + --query "INSERT INTO FUNCTION remoteSecure('$host', default.files, '$user', '$password') (path, content) SELECT line, file(line) FROM table" --progress diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 64ff3e8e2cb..229eccefa48 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -48,6 +48,7 @@ AutoML Autocompletion AvroConfluent BIGINT +bigrams BIGSERIAL BORO BSON @@ -574,6 +575,7 @@ MySQLDump MySQLThreads NATS NCHAR +NDJSON NEKUDOTAYIM NEWDATE NEWDECIMAL @@ -716,6 +718,8 @@ PlantUML PointDistKm PointDistM PointDistRads +PostHistory +PostLink PostgreSQLConnection PostgreSQLThreads Postgres @@ -963,6 +967,7 @@ ToGeoBoundary ToIPv ToParent ToSnowflake +ToSnowflakeID ToString ToUnicode Toolset @@ -1007,12 +1012,13 @@ UncompressedCacheBytes UncompressedCacheCells UnidirectionalEdgeIsValid UniqThetaSketch +unigrams Updatable Uppercased Uptime Uptrace -UrlDecode -UrlEncode +URLDecode +URLEncode UserID Util VARCHAR @@ -1458,6 +1464,7 @@ datatypes dateName dateTime dateTimeToSnowflake +dateTimeToSnowflakeID datetime datetimes dayofyear @@ -1505,9 +1512,11 @@ deserializing destructor destructors detectCharset +detectTonality detectLanguage detectLanguageMixed detectLanguageUnknown +detectProgrammingLanguage determinator deterministically dictGet @@ -1524,6 +1533,7 @@ disableProtocols disjunction disjunctions displaySecretsInShowAndSelect +displayName distro divideDecimal dmesg @@ -1541,6 +1551,7 @@ dumpColumnStructure durations ecto editDistance +editDistanceUTF embeddings emptyArray emptyArrayDate @@ -1573,6 +1584,10 @@ exFAT expiryMsec exponentialMovingAverage exponentialmovingaverage +exponentialTimeDecayedAvg +exponentialTimeDecayedCount +exponentialTimeDecayedMax +exponentialTimeDecayedSum expr exprN extendedVerification @@ -1791,6 +1806,7 @@ incrementing indexHint indexOf infi +infty inflight initcap initcapUTF @@ -1898,6 +1914,7 @@ lessOrEquals lessorequals levenshtein levenshteinDistance +levenshteinDistanceUTF lexicographically lgamma libFuzzer @@ -2248,6 +2265,7 @@ proleptic prometheus proportionsZTest proto +protocol protobuf protobufsingle proxied @@ -2476,6 +2494,7 @@ skewpop skewsamp skippingerrors sleepEachRow +snowflakeIDToDateTime snowflakeToDateTime socketcache soundex @@ -2500,6 +2519,7 @@ sqlite sqrt src srcReplicas +stackoverflow stacktrace stacktraces startsWith @@ -2838,6 +2858,7 @@ userver utils uuid uuidv +vCPU varPop varPopStable varSamp @@ -2847,7 +2868,9 @@ variantElement variantType varint varpop +varpopstable varsamp +varsampstable vectorized vectorscan vendoring diff --git a/utils/check-style/check-mypy b/utils/check-style/check-mypy index 42cb7fbbd15..4434377e627 100755 --- a/utils/check-style/check-mypy +++ b/utils/check-style/check-mypy @@ -11,13 +11,15 @@ GIT_ROOT=${GIT_ROOT:-.} CONFIG="$GIT_ROOT/tests/ci/.mypy.ini" DIRS=("$GIT_ROOT/tests/ci/" "$GIT_ROOT/tests/ci/"*/) tmp=$(mktemp) + for dir in "${DIRS[@]}"; do if ! compgen -G "$dir"/*.py > /dev/null; then continue fi - if ! mypy --config-file="$CONFIG" --sqlite-cache "$dir"/*.py > "$tmp" 2>&1; then + if ! mypy --config-file="$CONFIG" --sqlite-cache $(find "$dir" -maxdepth 1 -name "*.py" | grep -v "test_") > "$tmp" 2>&1; then echo "Errors while processing $dir": cat "$tmp" fi done + rm -rf "$tmp" diff --git a/utils/check-style/check-pylint b/utils/check-style/check-pylint index 7959a414023..8cfbc68ac96 100755 --- a/utils/check-style/check-pylint +++ b/utils/check-style/check-pylint @@ -10,6 +10,7 @@ function xargs-pylint { xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/pyproject.toml" --persistent=no --score=n } -find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50 +# exclude ci unittest scripts from check: test_* +find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | grep -v "/test_" | xargs-pylint 50 # Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1 diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 7f25ca4607c..380656cd1ca 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -93,6 +93,7 @@ EXTERN_TYPES_EXCLUDES=( ErrorCodes::values ErrorCodes::values[i] ErrorCodes::getErrorCodeByName + ErrorCodes::Value ) for extern_type in ${!EXTERN_TYPES[@]}; do type_of_extern=${EXTERN_TYPES[$extern_type]} @@ -321,10 +322,14 @@ std_cerr_cout_excludes=( src/Client/LineReader.cpp src/Client/QueryFuzzer.cpp src/Client/Suggest.cpp + src/Client/ClientBase.h + src/Client/LineReader.h + src/Client/ReplxxLineReader.h src/Bridge/IBridge.cpp src/Daemon/BaseDaemon.cpp src/Loggers/Loggers.cpp src/Common/GWPAsan.cpp + src/Common/ProgressIndication.h ) sources_with_std_cerr_cout=( $( find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \ diff --git a/utils/grpc-client/pb2/clickhouse_grpc_pb2.py b/utils/grpc-client/pb2/clickhouse_grpc_pb2.py index 6218047af3c..1e2c63012f3 100644 --- a/utils/grpc-client/pb2/clickhouse_grpc_pb2.py +++ b/utils/grpc-client/pb2/clickhouse_grpc_pb2.py @@ -1,13 +1,12 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: clickhouse_grpc.proto +# Protobuf Python Version: 4.25.3 """Generated protocol buffer code.""" -from google.protobuf.internal import enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -15,149 +14,45 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x63lickhouse_grpc.proto\x12\x0f\x63lickhouse.grpc\")\n\x0bNameAndType\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\"\xf5\x01\n\rExternalTable\x12\x0c\n\x04name\x18\x01 \x01(\t\x12-\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\x0e\n\x06\x66ormat\x18\x04 \x01(\t\x12\x18\n\x10\x63ompression_type\x18\x06 \x01(\t\x12>\n\x08settings\x18\x05 \x03(\x0b\x32,.clickhouse.grpc.ExternalTable.SettingsEntry\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x85\x03\n\x1cObsoleteTransportCompression\x12U\n\talgorithm\x18\x01 \x01(\x0e\x32\x42.clickhouse.grpc.ObsoleteTransportCompression.CompressionAlgorithm\x12M\n\x05level\x18\x02 \x01(\x0e\x32>.clickhouse.grpc.ObsoleteTransportCompression.CompressionLevel\"R\n\x14\x43ompressionAlgorithm\x12\x12\n\x0eNO_COMPRESSION\x10\x00\x12\x0b\n\x07\x44\x45\x46LATE\x10\x01\x12\x08\n\x04GZIP\x10\x02\x12\x0f\n\x0bSTREAM_GZIP\x10\x03\"k\n\x10\x43ompressionLevel\x12\x14\n\x10\x43OMPRESSION_NONE\x10\x00\x12\x13\n\x0f\x43OMPRESSION_LOW\x10\x01\x12\x16\n\x12\x43OMPRESSION_MEDIUM\x10\x02\x12\x14\n\x10\x43OMPRESSION_HIGH\x10\x03\"\x8e\x06\n\tQueryInfo\x12\r\n\x05query\x18\x01 \x01(\t\x12\x10\n\x08query_id\x18\x02 \x01(\t\x12:\n\x08settings\x18\x03 \x03(\x0b\x32(.clickhouse.grpc.QueryInfo.SettingsEntry\x12\x10\n\x08\x64\x61tabase\x18\x04 \x01(\t\x12\x12\n\ninput_data\x18\x05 \x01(\x0c\x12\x1c\n\x14input_data_delimiter\x18\x06 \x01(\x0c\x12\x15\n\routput_format\x18\x07 \x01(\t\x12\x1b\n\x13send_output_columns\x18\x18 \x01(\x08\x12\x37\n\x0f\x65xternal_tables\x18\x08 \x03(\x0b\x32\x1e.clickhouse.grpc.ExternalTable\x12\x11\n\tuser_name\x18\t \x01(\t\x12\x10\n\x08password\x18\n \x01(\t\x12\r\n\x05quota\x18\x0b \x01(\t\x12\x12\n\nsession_id\x18\x0c \x01(\t\x12\x15\n\rsession_check\x18\r \x01(\x08\x12\x17\n\x0fsession_timeout\x18\x0e \x01(\r\x12\x0e\n\x06\x63\x61ncel\x18\x0f \x01(\x08\x12\x17\n\x0fnext_query_info\x18\x10 \x01(\x08\x12\x1e\n\x16input_compression_type\x18\x14 \x01(\t\x12\x1f\n\x17output_compression_type\x18\x15 \x01(\t\x12 \n\x18output_compression_level\x18\x13 \x01(\x05\x12\"\n\x1atransport_compression_type\x18\x16 \x01(\t\x12#\n\x1btransport_compression_level\x18\x17 \x01(\x05\x12R\n\x1bobsolete_result_compression\x18\x11 \x01(\x0b\x32-.clickhouse.grpc.ObsoleteTransportCompression\x12!\n\x19obsolete_compression_type\x18\x12 \x01(\t\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xa1\x01\n\x08LogEntry\x12\x0c\n\x04time\x18\x01 \x01(\r\x12\x19\n\x11time_microseconds\x18\x02 \x01(\r\x12\x11\n\tthread_id\x18\x03 \x01(\x04\x12\x10\n\x08query_id\x18\x04 \x01(\t\x12)\n\x05level\x18\x05 \x01(\x0e\x32\x1a.clickhouse.grpc.LogsLevel\x12\x0e\n\x06source\x18\x06 \x01(\t\x12\x0c\n\x04text\x18\x07 \x01(\t\"z\n\x08Progress\x12\x11\n\tread_rows\x18\x01 \x01(\x04\x12\x12\n\nread_bytes\x18\x02 \x01(\x04\x12\x1a\n\x12total_rows_to_read\x18\x03 \x01(\x04\x12\x14\n\x0cwritten_rows\x18\x04 \x01(\x04\x12\x15\n\rwritten_bytes\x18\x05 \x01(\x04\"p\n\x05Stats\x12\x0c\n\x04rows\x18\x01 \x01(\x04\x12\x0e\n\x06\x62locks\x18\x02 \x01(\x04\x12\x17\n\x0f\x61llocated_bytes\x18\x03 \x01(\x04\x12\x15\n\rapplied_limit\x18\x04 \x01(\x08\x12\x19\n\x11rows_before_limit\x18\x05 \x01(\x04\"R\n\tException\x12\x0c\n\x04\x63ode\x18\x01 \x01(\x05\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x14\n\x0c\x64isplay_text\x18\x03 \x01(\t\x12\x13\n\x0bstack_trace\x18\x04 \x01(\t\"\xeb\x02\n\x06Result\x12\x10\n\x08query_id\x18\t \x01(\t\x12\x11\n\ttime_zone\x18\n \x01(\t\x12\x15\n\routput_format\x18\x0b \x01(\t\x12\x34\n\x0eoutput_columns\x18\x0c \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0e\n\x06output\x18\x01 \x01(\x0c\x12\x0e\n\x06totals\x18\x02 \x01(\x0c\x12\x10\n\x08\x65xtremes\x18\x03 \x01(\x0c\x12\'\n\x04logs\x18\x04 \x03(\x0b\x32\x19.clickhouse.grpc.LogEntry\x12+\n\x08progress\x18\x05 \x01(\x0b\x32\x19.clickhouse.grpc.Progress\x12%\n\x05stats\x18\x06 \x01(\x0b\x32\x16.clickhouse.grpc.Stats\x12-\n\texception\x18\x07 \x01(\x0b\x32\x1a.clickhouse.grpc.Exception\x12\x11\n\tcancelled\x18\x08 \x01(\x08*\x9d\x01\n\tLogsLevel\x12\x0c\n\x08LOG_NONE\x10\x00\x12\r\n\tLOG_FATAL\x10\x01\x12\x10\n\x0cLOG_CRITICAL\x10\x02\x12\r\n\tLOG_ERROR\x10\x03\x12\x0f\n\x0bLOG_WARNING\x10\x04\x12\x0e\n\nLOG_NOTICE\x10\x05\x12\x13\n\x0fLOG_INFORMATION\x10\x06\x12\r\n\tLOG_DEBUG\x10\x07\x12\r\n\tLOG_TRACE\x10\x08\x32\xdb\x02\n\nClickHouse\x12\x45\n\x0c\x45xecuteQuery\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00\x12V\n\x1b\x45xecuteQueryWithStreamInput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00(\x01\x12W\n\x1c\x45xecuteQueryWithStreamOutput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00\x30\x01\x12U\n\x18\x45xecuteQueryWithStreamIO\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00(\x01\x30\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x63lickhouse_grpc.proto\x12\x0f\x63lickhouse.grpc\")\n\x0bNameAndType\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\"\xf5\x01\n\rExternalTable\x12\x0c\n\x04name\x18\x01 \x01(\t\x12-\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\x0e\n\x06\x66ormat\x18\x04 \x01(\t\x12\x18\n\x10\x63ompression_type\x18\x06 \x01(\t\x12>\n\x08settings\x18\x05 \x03(\x0b\x32,.clickhouse.grpc.ExternalTable.SettingsEntry\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x85\x03\n\x1cObsoleteTransportCompression\x12U\n\talgorithm\x18\x01 \x01(\x0e\x32\x42.clickhouse.grpc.ObsoleteTransportCompression.CompressionAlgorithm\x12M\n\x05level\x18\x02 \x01(\x0e\x32>.clickhouse.grpc.ObsoleteTransportCompression.CompressionLevel\"R\n\x14\x43ompressionAlgorithm\x12\x12\n\x0eNO_COMPRESSION\x10\x00\x12\x0b\n\x07\x44\x45\x46LATE\x10\x01\x12\x08\n\x04GZIP\x10\x02\x12\x0f\n\x0bSTREAM_GZIP\x10\x03\"k\n\x10\x43ompressionLevel\x12\x14\n\x10\x43OMPRESSION_NONE\x10\x00\x12\x13\n\x0f\x43OMPRESSION_LOW\x10\x01\x12\x16\n\x12\x43OMPRESSION_MEDIUM\x10\x02\x12\x14\n\x10\x43OMPRESSION_HIGH\x10\x03\"\x9b\x06\n\tQueryInfo\x12\r\n\x05query\x18\x01 \x01(\t\x12\x10\n\x08query_id\x18\x02 \x01(\t\x12:\n\x08settings\x18\x03 \x03(\x0b\x32(.clickhouse.grpc.QueryInfo.SettingsEntry\x12\x10\n\x08\x64\x61tabase\x18\x04 \x01(\t\x12\x12\n\ninput_data\x18\x05 \x01(\x0c\x12\x1c\n\x14input_data_delimiter\x18\x06 \x01(\x0c\x12\x15\n\routput_format\x18\x07 \x01(\t\x12\x1b\n\x13send_output_columns\x18\x18 \x01(\x08\x12\x37\n\x0f\x65xternal_tables\x18\x08 \x03(\x0b\x32\x1e.clickhouse.grpc.ExternalTable\x12\x11\n\tuser_name\x18\t \x01(\t\x12\x10\n\x08password\x18\n \x01(\t\x12\r\n\x05quota\x18\x0b \x01(\t\x12\x0b\n\x03jwt\x18\x19 \x01(\t\x12\x12\n\nsession_id\x18\x0c \x01(\t\x12\x15\n\rsession_check\x18\r \x01(\x08\x12\x17\n\x0fsession_timeout\x18\x0e \x01(\r\x12\x0e\n\x06\x63\x61ncel\x18\x0f \x01(\x08\x12\x17\n\x0fnext_query_info\x18\x10 \x01(\x08\x12\x1e\n\x16input_compression_type\x18\x14 \x01(\t\x12\x1f\n\x17output_compression_type\x18\x15 \x01(\t\x12 \n\x18output_compression_level\x18\x13 \x01(\x05\x12\"\n\x1atransport_compression_type\x18\x16 \x01(\t\x12#\n\x1btransport_compression_level\x18\x17 \x01(\x05\x12R\n\x1bobsolete_result_compression\x18\x11 \x01(\x0b\x32-.clickhouse.grpc.ObsoleteTransportCompression\x12!\n\x19obsolete_compression_type\x18\x12 \x01(\t\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xa1\x01\n\x08LogEntry\x12\x0c\n\x04time\x18\x01 \x01(\r\x12\x19\n\x11time_microseconds\x18\x02 \x01(\r\x12\x11\n\tthread_id\x18\x03 \x01(\x04\x12\x10\n\x08query_id\x18\x04 \x01(\t\x12)\n\x05level\x18\x05 \x01(\x0e\x32\x1a.clickhouse.grpc.LogsLevel\x12\x0e\n\x06source\x18\x06 \x01(\t\x12\x0c\n\x04text\x18\x07 \x01(\t\"z\n\x08Progress\x12\x11\n\tread_rows\x18\x01 \x01(\x04\x12\x12\n\nread_bytes\x18\x02 \x01(\x04\x12\x1a\n\x12total_rows_to_read\x18\x03 \x01(\x04\x12\x14\n\x0cwritten_rows\x18\x04 \x01(\x04\x12\x15\n\rwritten_bytes\x18\x05 \x01(\x04\"p\n\x05Stats\x12\x0c\n\x04rows\x18\x01 \x01(\x04\x12\x0e\n\x06\x62locks\x18\x02 \x01(\x04\x12\x17\n\x0f\x61llocated_bytes\x18\x03 \x01(\x04\x12\x15\n\rapplied_limit\x18\x04 \x01(\x08\x12\x19\n\x11rows_before_limit\x18\x05 \x01(\x04\"R\n\tException\x12\x0c\n\x04\x63ode\x18\x01 \x01(\x05\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x14\n\x0c\x64isplay_text\x18\x03 \x01(\t\x12\x13\n\x0bstack_trace\x18\x04 \x01(\t\"\xeb\x02\n\x06Result\x12\x10\n\x08query_id\x18\t \x01(\t\x12\x11\n\ttime_zone\x18\n \x01(\t\x12\x15\n\routput_format\x18\x0b \x01(\t\x12\x34\n\x0eoutput_columns\x18\x0c \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0e\n\x06output\x18\x01 \x01(\x0c\x12\x0e\n\x06totals\x18\x02 \x01(\x0c\x12\x10\n\x08\x65xtremes\x18\x03 \x01(\x0c\x12\'\n\x04logs\x18\x04 \x03(\x0b\x32\x19.clickhouse.grpc.LogEntry\x12+\n\x08progress\x18\x05 \x01(\x0b\x32\x19.clickhouse.grpc.Progress\x12%\n\x05stats\x18\x06 \x01(\x0b\x32\x16.clickhouse.grpc.Stats\x12-\n\texception\x18\x07 \x01(\x0b\x32\x1a.clickhouse.grpc.Exception\x12\x11\n\tcancelled\x18\x08 \x01(\x08*\x9d\x01\n\tLogsLevel\x12\x0c\n\x08LOG_NONE\x10\x00\x12\r\n\tLOG_FATAL\x10\x01\x12\x10\n\x0cLOG_CRITICAL\x10\x02\x12\r\n\tLOG_ERROR\x10\x03\x12\x0f\n\x0bLOG_WARNING\x10\x04\x12\x0e\n\nLOG_NOTICE\x10\x05\x12\x13\n\x0fLOG_INFORMATION\x10\x06\x12\r\n\tLOG_DEBUG\x10\x07\x12\r\n\tLOG_TRACE\x10\x08\x32\xdb\x02\n\nClickHouse\x12\x45\n\x0c\x45xecuteQuery\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00\x12V\n\x1b\x45xecuteQueryWithStreamInput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00(\x01\x12W\n\x1c\x45xecuteQueryWithStreamOutput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00\x30\x01\x12U\n\x18\x45xecuteQueryWithStreamIO\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00(\x01\x30\x01\x62\x06proto3') -_LOGSLEVEL = DESCRIPTOR.enum_types_by_name['LogsLevel'] -LogsLevel = enum_type_wrapper.EnumTypeWrapper(_LOGSLEVEL) -LOG_NONE = 0 -LOG_FATAL = 1 -LOG_CRITICAL = 2 -LOG_ERROR = 3 -LOG_WARNING = 4 -LOG_NOTICE = 5 -LOG_INFORMATION = 6 -LOG_DEBUG = 7 -LOG_TRACE = 8 - - -_NAMEANDTYPE = DESCRIPTOR.message_types_by_name['NameAndType'] -_EXTERNALTABLE = DESCRIPTOR.message_types_by_name['ExternalTable'] -_EXTERNALTABLE_SETTINGSENTRY = _EXTERNALTABLE.nested_types_by_name['SettingsEntry'] -_OBSOLETETRANSPORTCOMPRESSION = DESCRIPTOR.message_types_by_name['ObsoleteTransportCompression'] -_QUERYINFO = DESCRIPTOR.message_types_by_name['QueryInfo'] -_QUERYINFO_SETTINGSENTRY = _QUERYINFO.nested_types_by_name['SettingsEntry'] -_LOGENTRY = DESCRIPTOR.message_types_by_name['LogEntry'] -_PROGRESS = DESCRIPTOR.message_types_by_name['Progress'] -_STATS = DESCRIPTOR.message_types_by_name['Stats'] -_EXCEPTION = DESCRIPTOR.message_types_by_name['Exception'] -_RESULT = DESCRIPTOR.message_types_by_name['Result'] -_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM = _OBSOLETETRANSPORTCOMPRESSION.enum_types_by_name['CompressionAlgorithm'] -_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL = _OBSOLETETRANSPORTCOMPRESSION.enum_types_by_name['CompressionLevel'] -NameAndType = _reflection.GeneratedProtocolMessageType('NameAndType', (_message.Message,), { - 'DESCRIPTOR' : _NAMEANDTYPE, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.NameAndType) - }) -_sym_db.RegisterMessage(NameAndType) - -ExternalTable = _reflection.GeneratedProtocolMessageType('ExternalTable', (_message.Message,), { - - 'SettingsEntry' : _reflection.GeneratedProtocolMessageType('SettingsEntry', (_message.Message,), { - 'DESCRIPTOR' : _EXTERNALTABLE_SETTINGSENTRY, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.ExternalTable.SettingsEntry) - }) - , - 'DESCRIPTOR' : _EXTERNALTABLE, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.ExternalTable) - }) -_sym_db.RegisterMessage(ExternalTable) -_sym_db.RegisterMessage(ExternalTable.SettingsEntry) - -ObsoleteTransportCompression = _reflection.GeneratedProtocolMessageType('ObsoleteTransportCompression', (_message.Message,), { - 'DESCRIPTOR' : _OBSOLETETRANSPORTCOMPRESSION, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.ObsoleteTransportCompression) - }) -_sym_db.RegisterMessage(ObsoleteTransportCompression) - -QueryInfo = _reflection.GeneratedProtocolMessageType('QueryInfo', (_message.Message,), { - - 'SettingsEntry' : _reflection.GeneratedProtocolMessageType('SettingsEntry', (_message.Message,), { - 'DESCRIPTOR' : _QUERYINFO_SETTINGSENTRY, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.QueryInfo.SettingsEntry) - }) - , - 'DESCRIPTOR' : _QUERYINFO, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.QueryInfo) - }) -_sym_db.RegisterMessage(QueryInfo) -_sym_db.RegisterMessage(QueryInfo.SettingsEntry) - -LogEntry = _reflection.GeneratedProtocolMessageType('LogEntry', (_message.Message,), { - 'DESCRIPTOR' : _LOGENTRY, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.LogEntry) - }) -_sym_db.RegisterMessage(LogEntry) - -Progress = _reflection.GeneratedProtocolMessageType('Progress', (_message.Message,), { - 'DESCRIPTOR' : _PROGRESS, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.Progress) - }) -_sym_db.RegisterMessage(Progress) - -Stats = _reflection.GeneratedProtocolMessageType('Stats', (_message.Message,), { - 'DESCRIPTOR' : _STATS, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.Stats) - }) -_sym_db.RegisterMessage(Stats) - -Exception = _reflection.GeneratedProtocolMessageType('Exception', (_message.Message,), { - 'DESCRIPTOR' : _EXCEPTION, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.Exception) - }) -_sym_db.RegisterMessage(Exception) - -Result = _reflection.GeneratedProtocolMessageType('Result', (_message.Message,), { - 'DESCRIPTOR' : _RESULT, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.Result) - }) -_sym_db.RegisterMessage(Result) - -_CLICKHOUSE = DESCRIPTOR.services_by_name['ClickHouse'] +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'clickhouse_grpc_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - _EXTERNALTABLE_SETTINGSENTRY._options = None - _EXTERNALTABLE_SETTINGSENTRY._serialized_options = b'8\001' - _QUERYINFO_SETTINGSENTRY._options = None - _QUERYINFO_SETTINGSENTRY._serialized_options = b'8\001' - _LOGSLEVEL._serialized_start=2363 - _LOGSLEVEL._serialized_end=2520 - _NAMEANDTYPE._serialized_start=42 - _NAMEANDTYPE._serialized_end=83 - _EXTERNALTABLE._serialized_start=86 - _EXTERNALTABLE._serialized_end=331 - _EXTERNALTABLE_SETTINGSENTRY._serialized_start=284 - _EXTERNALTABLE_SETTINGSENTRY._serialized_end=331 - _OBSOLETETRANSPORTCOMPRESSION._serialized_start=334 - _OBSOLETETRANSPORTCOMPRESSION._serialized_end=723 - _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM._serialized_start=532 - _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM._serialized_end=614 - _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL._serialized_start=616 - _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL._serialized_end=723 - _QUERYINFO._serialized_start=726 - _QUERYINFO._serialized_end=1508 - _QUERYINFO_SETTINGSENTRY._serialized_start=284 - _QUERYINFO_SETTINGSENTRY._serialized_end=331 - _LOGENTRY._serialized_start=1511 - _LOGENTRY._serialized_end=1672 - _PROGRESS._serialized_start=1674 - _PROGRESS._serialized_end=1796 - _STATS._serialized_start=1798 - _STATS._serialized_end=1910 - _EXCEPTION._serialized_start=1912 - _EXCEPTION._serialized_end=1994 - _RESULT._serialized_start=1997 - _RESULT._serialized_end=2360 - _CLICKHOUSE._serialized_start=2523 - _CLICKHOUSE._serialized_end=2870 + _globals['_EXTERNALTABLE_SETTINGSENTRY']._options = None + _globals['_EXTERNALTABLE_SETTINGSENTRY']._serialized_options = b'8\001' + _globals['_QUERYINFO_SETTINGSENTRY']._options = None + _globals['_QUERYINFO_SETTINGSENTRY']._serialized_options = b'8\001' + _globals['_LOGSLEVEL']._serialized_start=2376 + _globals['_LOGSLEVEL']._serialized_end=2533 + _globals['_NAMEANDTYPE']._serialized_start=42 + _globals['_NAMEANDTYPE']._serialized_end=83 + _globals['_EXTERNALTABLE']._serialized_start=86 + _globals['_EXTERNALTABLE']._serialized_end=331 + _globals['_EXTERNALTABLE_SETTINGSENTRY']._serialized_start=284 + _globals['_EXTERNALTABLE_SETTINGSENTRY']._serialized_end=331 + _globals['_OBSOLETETRANSPORTCOMPRESSION']._serialized_start=334 + _globals['_OBSOLETETRANSPORTCOMPRESSION']._serialized_end=723 + _globals['_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM']._serialized_start=532 + _globals['_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM']._serialized_end=614 + _globals['_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL']._serialized_start=616 + _globals['_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL']._serialized_end=723 + _globals['_QUERYINFO']._serialized_start=726 + _globals['_QUERYINFO']._serialized_end=1521 + _globals['_QUERYINFO_SETTINGSENTRY']._serialized_start=284 + _globals['_QUERYINFO_SETTINGSENTRY']._serialized_end=331 + _globals['_LOGENTRY']._serialized_start=1524 + _globals['_LOGENTRY']._serialized_end=1685 + _globals['_PROGRESS']._serialized_start=1687 + _globals['_PROGRESS']._serialized_end=1809 + _globals['_STATS']._serialized_start=1811 + _globals['_STATS']._serialized_end=1923 + _globals['_EXCEPTION']._serialized_start=1925 + _globals['_EXCEPTION']._serialized_end=2007 + _globals['_RESULT']._serialized_start=2010 + _globals['_RESULT']._serialized_end=2373 + _globals['_CLICKHOUSE']._serialized_start=2536 + _globals['_CLICKHOUSE']._serialized_end=2883 # @@protoc_insertion_point(module_scope) diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index ed7e09685f0..5ae4c7a0b1c 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1238,9 +1238,13 @@ void Runner::createConnections() std::shared_ptr Runner::getConnection(const ConnectionInfo & connection_info, size_t connection_info_idx) { - Coordination::ZooKeeper::Node node{Poco::Net::SocketAddress{connection_info.host}, static_cast(connection_info_idx), connection_info.secure}; - std::vector nodes; - nodes.push_back(node); + zkutil::ShuffleHost host; + host.host = connection_info.host; + host.secure = connection_info.secure; + host.original_index = static_cast(connection_info_idx); + host.address = Poco::Net::SocketAddress{connection_info.host}; + + zkutil::ShuffleHosts nodes{host}; zkutil::ZooKeeperArgs args; args.session_timeout_ms = connection_info.session_timeout_ms; args.connection_timeout_ms = connection_info.connection_timeout_ms; diff --git a/utils/zookeeper-cli/CMakeLists.txt b/utils/zookeeper-cli/CMakeLists.txt index cad7164b775..fd2fa669f40 100644 --- a/utils/zookeeper-cli/CMakeLists.txt +++ b/utils/zookeeper-cli/CMakeLists.txt @@ -3,4 +3,6 @@ clickhouse_add_executable(clickhouse-zookeeper-cli ${ClickHouse_SOURCE_DIR}/src/Client/LineReader.cpp) target_link_libraries(clickhouse-zookeeper-cli PRIVATE clickhouse_common_zookeeper_no_log - dbms) + dbms + clickhouse_functions +) diff --git a/utils/zookeeper-dump-tree/CMakeLists.txt b/utils/zookeeper-dump-tree/CMakeLists.txt index 85e4d18c19f..3f3df65776a 100644 --- a/utils/zookeeper-dump-tree/CMakeLists.txt +++ b/utils/zookeeper-dump-tree/CMakeLists.txt @@ -3,4 +3,5 @@ target_link_libraries(zookeeper-dump-tree PRIVATE clickhouse_common_zookeeper_no_log clickhouse_common_io dbms + clickhouse_functions boost::program_options) diff --git a/utils/zookeeper-remove-by-list/CMakeLists.txt b/utils/zookeeper-remove-by-list/CMakeLists.txt index 50aaed76110..a4d7dccef65 100644 --- a/utils/zookeeper-remove-by-list/CMakeLists.txt +++ b/utils/zookeeper-remove-by-list/CMakeLists.txt @@ -2,4 +2,5 @@ clickhouse_add_executable (zookeeper-remove-by-list main.cpp ${SRCS}) target_link_libraries(zookeeper-remove-by-list PRIVATE clickhouse_common_zookeeper_no_log dbms + clickhouse_functions boost::program_options)