diff --git a/.github/actions/release/action.yml b/.github/actions/release/action.yml index 99ec02662f6..c3897682a33 100644 --- a/.github/actions/release/action.yml +++ b/.github/actions/release/action.yml @@ -62,8 +62,8 @@ runs: if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/create_release.py --set-progress-started --progress "update ChangeLog" - [ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1 + git checkout master + python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security" echo "List versions" ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv echo "Update docker version" @@ -96,17 +96,13 @@ runs: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} ### Changelog category (leave one): - Not for changelog (changelog entry is not required) - - name: Reset changes if Dry-run - if: ${{ inputs.dry-run }} + - name: Complete previous steps and Restore git state + if: ${{ inputs.type == 'patch' }} shell: bash run: | - git reset --hard HEAD - - name: Checkout back to GITHUB_REF - shell: bash - run: | - git checkout "$GITHUB_REF_NAME" - # set current progress to OK python3 ./tests/ci/create_release.py --set-progress-completed + git reset --hard HEAD + git checkout "$GITHUB_REF_NAME" - name: Create GH Release shell: bash if: ${{ inputs.type == 'patch' }} @@ -146,24 +142,23 @@ runs: if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/create_release.py --set-progress-started --progress "docker server release" cd "./tests/ci" + python3 ./create_release.py --set-progress-started --progress "docker server release" export CHECK_NAME="Docker server image" python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - python3 ./tests/ci/create_release.py --set-progress-completed + python3 ./create_release.py --set-progress-completed - name: Docker clickhouse/clickhouse-keeper building if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/create_release.py --set-progress-started --progress "docker keeper release" cd "./tests/ci" + python3 ./create_release.py --set-progress-started --progress "docker keeper release" export CHECK_NAME="Docker keeper image" python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - python3 ./tests/ci/create_release.py --set-progress-completed - - name: Set Release progress completed + python3 ./create_release.py --set-progress-completed + - name: Set current Release progress to Completed with OK shell: bash run: | - # If we here - set completed status, to post proper Slack OK or FAIL message in the next step python3 ./tests/ci/create_release.py --set-progress-started --progress "completed" python3 ./tests/ci/create_release.py --set-progress-completed - name: Post Slack Message diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 9645d0e46de..322946ac77b 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -241,8 +241,9 @@ jobs: runner_type: stress-tester data: ${{ needs.RunConfig.outputs.data }} FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: + - RunConfig - Builds_Report - FunctionalStatelessTestAsan - FunctionalStatefulTestDebug @@ -257,6 +258,7 @@ jobs: with: clear-repository: true - name: Finish label + if: ${{ !failure() }} run: | cd "$GITHUB_WORKSPACE/tests/ci" # update mergeable check @@ -264,3 +266,10 @@ jobs: # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} python3 merge_pr.py + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat > "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml index db837ac1ec7..035ba2e5b98 100644 --- a/.github/workflows/jepsen.yml +++ b/.github/workflows/jepsen.yml @@ -9,19 +9,64 @@ on: # yamllint disable-line rule:truthy - cron: '0 */6 * * *' workflow_dispatch: jobs: + RunConfig: + runs-on: [self-hosted, style-checker-aarch64] + outputs: + data: ${{ steps.runconfig.outputs.CI_DATA }} + steps: + - name: DebugInfo + uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6 + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true # to ensure correct digests + fetch-depth: 0 # to get version + filter: tree:0 + - name: PrepareRunConfig + id: runconfig + run: | + echo "::group::configure CI run" + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --workflow "$GITHUB_WORKFLOW" --outfile ${{ runner.temp }}/ci_run_data.json + echo "::endgroup::" + + echo "::group::CI run configure results" + python3 -m json.tool ${{ runner.temp }}/ci_run_data.json + echo "::endgroup::" + { + echo 'CI_DATA<> "$GITHUB_OUTPUT" KeeperJepsenRelease: - uses: ./.github/workflows/reusable_simple_job.yml + needs: [RunConfig] + uses: ./.github/workflows/reusable_test.yml with: - test_name: Jepsen keeper check - runner_type: style-checker - report_required: true + test_name: ClickHouse Keeper Jepsen + runner_type: style-checker-aarch64 + data: ${{ needs.RunConfig.outputs.data }} run_command: | python3 jepsen_check.py keeper - # ServerJepsenRelease: - # uses: ./.github/workflows/reusable_simple_job.yml - # with: - # test_name: Jepsen server check - # runner_type: style-checker - # run_command: | - # cd "$REPO_COPY/tests/ci" - # python3 jepsen_check.py server + ServerJepsenRelease: + if: false # skip for server + needs: [RunConfig] + uses: ./.github/workflows/reusable_test.yml + with: + test_name: ClickHouse Server Jepsen + runner_type: style-checker-aarch64 + data: ${{ needs.RunConfig.outputs.data }} + run_command: | + python3 jepsen_check.py server + CheckWorkflow: + if: ${{ !cancelled() }} + needs: [RunConfig, ServerJepsenRelease, KeeperJepsenRelease] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 09acef5eb8b..2ce1124404f 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -93,21 +93,21 @@ jobs: with: stage: Builds_2 data: ${{ needs.RunConfig.outputs.data }} - Tests_2: + Tests_2_ww: needs: [RunConfig, Builds_2] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2_ww') }} + uses: ./.github/workflows/reusable_test_stage.yml + with: + stage: Tests_2_ww + data: ${{ needs.RunConfig.outputs.data }} + Tests_2: + # Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there. + needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} uses: ./.github/workflows/reusable_test_stage.yml with: stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} - Tests_3: - # Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there. - needs: [RunConfig, Builds_1] - if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} - uses: ./.github/workflows/reusable_test_stage.yml - with: - stage: Tests_3 - data: ${{ needs.RunConfig.outputs.data }} ################################# Reports ################################# # Reports should run even if Builds_1/2 fail - run them separately, not in Tests_1/2/3 @@ -121,37 +121,9 @@ jobs: runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} - MarkReleaseReady: - if: ${{ !failure() && !cancelled() }} - needs: [RunConfig, Builds_1, Builds_2] - runs-on: [self-hosted, style-checker-aarch64] - steps: - - name: Debug - run: | - echo need with different filters - cat << 'EOF' - ${{ toJSON(needs) }} - ${{ toJSON(needs.*.result) }} - no failures ${{ !contains(needs.*.result, 'failure') }} - no skips ${{ !contains(needs.*.result, 'skipped') }} - no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - EOF - - name: Not ready - # fail the job to be able to restart it - if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }} - run: exit 1 - - name: Check out repository code - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - uses: ClickHouse/checkout@v1 - - name: Mark Commit Release Ready - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 mark_release_ready.py - FinishCheck: if: ${{ !cancelled() }} - needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] + needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2_ww, Tests_2] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code @@ -160,3 +132,11 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + if: ${{ !cancelled() }} + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat > "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index 31a65ac3d15..64083668719 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -93,7 +93,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} + if: ${{ !cancelled() }} # Test_2 or Test_3 must not have jobs required for Mergeable check needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1] runs-on: [self-hosted, style-checker-aarch64] @@ -101,6 +101,14 @@ jobs: - name: Check out repository code uses: ClickHouse/checkout@v1 - name: Check and set merge status + if: ${{ needs.StyleCheck.result == 'success' }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat > "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index bffe5b4c1bf..ea9c125db70 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -44,3 +44,17 @@ jobs: with: data: "${{ needs.RunConfig.outputs.data }}" set_latest: true + CheckWorkflow: + if: ${{ !cancelled() }} + needs: [RunConfig, BuildDockers] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat > "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 5124e4dba2c..854dff530e7 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -123,20 +123,20 @@ jobs: stage: Builds_2 data: ${{ needs.RunConfig.outputs.data }} # stage for running non-required checks without being blocked by required checks (Test_1) if corresponding settings is selected - Tests_2: + Tests_2_ww: needs: [RunConfig, Builds_1] + if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2_ww') }} + uses: ./.github/workflows/reusable_test_stage.yml + with: + stage: Tests_2_ww + data: ${{ needs.RunConfig.outputs.data }} + Tests_2: + needs: [RunConfig, Builds_1, Tests_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} uses: ./.github/workflows/reusable_test_stage.yml with: stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} - Tests_3: - needs: [RunConfig, Builds_1, Tests_1] - if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} - uses: ./.github/workflows/reusable_test_stage.yml - with: - stage: Tests_3 - data: ${{ needs.RunConfig.outputs.data }} ################################# Reports ################################# # Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3) @@ -151,9 +151,10 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} - # Test_2 or Test_3 must not have jobs required for Mergeable check - needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1] + if: ${{ !cancelled() }} + # Test_2 or Test_3 do not have the jobs required for Mergeable check, + # however, set them as "needs" to get all checks results before the automatic merge occurs. + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2_ww, Tests_2] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code @@ -161,15 +162,23 @@ jobs: with: filter: tree:0 - name: Check and set merge status + if: ${{ needs.StyleCheck.result == 'success' }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat > "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + python3 ./tests/ci/ci_buddy.py --check-wf-status ################################# Stage Final ################################# # FinishCheck: if: ${{ !failure() && !cancelled() }} - needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2_ww, Tests_2] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 6a18999d74e..b79208b03a6 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -441,8 +441,9 @@ jobs: runner_type: stress-tester data: ${{ needs.RunConfig.outputs.data }} FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: + - RunConfig - DockerServerImage - DockerKeeperImage - Builds_Report @@ -478,9 +479,18 @@ jobs: with: clear-repository: true - name: Finish label + if: ${{ !failure() }} run: | cd "$GITHUB_WORKSPACE/tests/ci" # update mergeable check python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat > "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.gitmodules b/.gitmodules index 12d865307d8..7e0b4df4ad1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -372,3 +372,6 @@ [submodule "contrib/double-conversion"] path = contrib/double-conversion url = https://github.com/ClickHouse/double-conversion.git +[submodule "contrib/numactl"] + path = contrib/numactl + url = https://github.com/ClickHouse/numactl.git diff --git a/.yamllint b/.yamllint index f144e2d47b1..7fb741ec9f4 100644 --- a/.yamllint +++ b/.yamllint @@ -14,3 +14,9 @@ rules: comments: min-spaces-from-content: 1 document-start: disable + colons: disable + indentation: disable + line-length: disable + trailing-spaces: disable + truthy: disable + new-line-at-end-of-file: disable diff --git a/CHANGELOG.md b/CHANGELOG.md index e2eb65e2967..620b7c99bac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v24.7, 2024-07-30](#247)**
**[ClickHouse release v24.6, 2024-07-01](#246)**
**[ClickHouse release v24.5, 2024-05-30](#245)**
**[ClickHouse release v24.4, 2024-04-30](#244)**
@@ -9,6 +10,177 @@ # 2024 Changelog +### ClickHouse release 24.7, 2024-07-30 + +#### Backward Incompatible Change +* Forbid `CRATE MATERIALIZED VIEW ... ENGINE Replicated*MergeTree POPULATE AS SELECT ...` with Replicated databases. [#63963](https://github.com/ClickHouse/ClickHouse/pull/63963) ([vdimir](https://github.com/vdimir)). +* `clickhouse-keeper-client` will only accept paths in string literals, such as `ls '/hello/world'`, not bare strings such as `ls /hello/world`. [#65494](https://github.com/ClickHouse/ClickHouse/pull/65494) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Metric `KeeperOutstandingRequets` was renamed to `KeeperOutstandingRequests`. [#66206](https://github.com/ClickHouse/ClickHouse/pull/66206) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove `is_deterministic` field from the `system.functions` table. [#66630](https://github.com/ClickHouse/ClickHouse/pull/66630) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Function `tuple` will now try to construct named tuples in query (controlled by `enable_named_columns_in_function_tuple`). Introduce function `tupleNames` to extract names from tuples. [#54881](https://github.com/ClickHouse/ClickHouse/pull/54881) ([Amos Bird](https://github.com/amosbird)). + +#### New Feature +* Add `ASOF JOIN` support for `full_sorting_join` algorithm. [#55051](https://github.com/ClickHouse/ClickHouse/pull/55051) ([vdimir](https://github.com/vdimir)). +* Add new window function `percent_rank`. [#62747](https://github.com/ClickHouse/ClickHouse/pull/62747) ([lgbo](https://github.com/lgbo-ustc)). +* Support JWT authentication in `clickhouse-client` (will be available only in ClickHouse Cloud). [#62829](https://github.com/ClickHouse/ClickHouse/pull/62829) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Add SQL functions `changeYear`, `changeMonth`, `changeDay`, `changeHour`, `changeMinute`, `changeSecond`. For example, `SELECT changeMonth(toDate('2024-06-14'), 7)` returns date `2024-07-14`. [#63186](https://github.com/ClickHouse/ClickHouse/pull/63186) ([cucumber95](https://github.com/cucumber95)). +* Introduce startup scripts, which allow the execution of preconfigured queries at the startup stage. [#64889](https://github.com/ClickHouse/ClickHouse/pull/64889) ([pufit](https://github.com/pufit)). +* Support accept_invalid_certificate in client's config in order to allow for client to connect over secure TCP to a server running with self-signed certificate - can be used as a shorthand for corresponding `openSSL` client settings `verificationMode=none` + `invalidCertificateHandler.name=AcceptCertificateHandler`. [#65238](https://github.com/ClickHouse/ClickHouse/pull/65238) ([peacewalker122](https://github.com/peacewalker122)). +* Add system.error_log which contains history of error values from table system.errors, periodically flushed to disk. [#65381](https://github.com/ClickHouse/ClickHouse/pull/65381) ([Pablo Marcos](https://github.com/pamarcos)). +* Add aggregate function `groupConcat`. About the same as `arrayStringConcat( groupArray(column), ',')` Can receive 2 parameters: a string delimiter and the number of elements to be processed. [#65451](https://github.com/ClickHouse/ClickHouse/pull/65451) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add a new setting to disable/enable writing page index into parquet files. [#65475](https://github.com/ClickHouse/ClickHouse/pull/65475) ([lgbo](https://github.com/lgbo-ustc)). +* Introduce `logger.console_log_level` server config to control the log level to the console (if enabled). [#65559](https://github.com/ClickHouse/ClickHouse/pull/65559) ([Azat Khuzhin](https://github.com/azat)). +* Automatically append a wildcard `*` to the end of a directory path with table function `file`. [#66019](https://github.com/ClickHouse/ClickHouse/pull/66019) ([Zhidong (David) Guo](https://github.com/Gun9niR)). +* Add `--memory-usage` option to client in non interactive mode. [#66393](https://github.com/ClickHouse/ClickHouse/pull/66393) ([vdimir](https://github.com/vdimir)). +* Make an interactive client for clickhouse-disks, add local disk from the local directory. [#64446](https://github.com/ClickHouse/ClickHouse/pull/64446) ([Daniil Ivanik](https://github.com/divanik)). +* When lightweight delete happens on a table with projection(s), users have choices either throw an exception (by default) or drop the projection [#65594](https://github.com/ClickHouse/ClickHouse/pull/65594) ([jsc0218](https://github.com/jsc0218)). + +#### Experimental Feature +* Change binary serialization of Variant data type: add `compact` mode to avoid writing the same discriminator multiple times for granules with single variant or with only NULL values. Add MergeTree setting `use_compact_variant_discriminators_serialization` that is enabled by default. Note that Variant type is still experimental and backward-incompatible change in serialization is ok. [#62774](https://github.com/ClickHouse/ClickHouse/pull/62774) ([Kruglov Pavel](https://github.com/Avogar)). +* Support rocksdb as backend storage of keeper. [#56626](https://github.com/ClickHouse/ClickHouse/pull/56626) ([Han Fei](https://github.com/hanfei1991)). +* Refactor JSONExtract functions, support more types including experimental Dynamic type. [#66046](https://github.com/ClickHouse/ClickHouse/pull/66046) ([Kruglov Pavel](https://github.com/Avogar)). +* Support null map subcolumn for Variant and Dynamic subcolumns. [#66178](https://github.com/ClickHouse/ClickHouse/pull/66178) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading dynamic subcolumns from altered Memory table. Previously if `max_types` parameter of a Dynamic type was changed in Memory table via alter, further subcolumns reading can return wrong result. [#66066](https://github.com/ClickHouse/ClickHouse/pull/66066) ([Kruglov Pavel](https://github.com/Avogar)). +* Add support for `cluster_for_parallel_replicas` when using custom key parallel replicas. It allows you to use parallel replicas with custom key with MergeTree tables. [#65453](https://github.com/ClickHouse/ClickHouse/pull/65453) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Performance Improvement +* Replace int to string algorithm with a faster one (from a modified amdn/itoa to a modified jeaiii/itoa). [#61661](https://github.com/ClickHouse/ClickHouse/pull/61661) ([Raúl Marín](https://github.com/Algunenano)). +* Sizes of hash tables created by join (`parallel_hash` algorithm) is collected and cached now. This information will be used to preallocate space in hash tables for subsequent query executions and save time on hash table resizes. [#64553](https://github.com/ClickHouse/ClickHouse/pull/64553) ([Nikita Taranov](https://github.com/nickitat)). +* Optimized queries with `ORDER BY` primary key and `WHERE` that have a condition with high selectivity by using of buffering. It is controlled by setting `read_in_order_use_buffering` (enabled by default) and can increase memory usage of query. [#64607](https://github.com/ClickHouse/ClickHouse/pull/64607) ([Anton Popov](https://github.com/CurtizJ)). +* Improve performance of loading `plain_rewritable` metadata. [#65634](https://github.com/ClickHouse/ClickHouse/pull/65634) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Attaching tables on read-only disks will use fewer resources by not loading outdated parts. [#65635](https://github.com/ClickHouse/ClickHouse/pull/65635) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support minmax hyperrectangle for Set indices. [#65676](https://github.com/ClickHouse/ClickHouse/pull/65676) ([AntiTopQuark](https://github.com/AntiTopQuark)). +* Unload primary index of outdated parts to reduce total memory usage. [#65852](https://github.com/ClickHouse/ClickHouse/pull/65852) ([Anton Popov](https://github.com/CurtizJ)). +* Functions `replaceRegexpAll` and `replaceRegexpOne` are now significantly faster if the pattern is trivial, i.e. contains no metacharacters, pattern classes, flags, grouping characters etc. (Thanks to Taiyang Li). [#66185](https://github.com/ClickHouse/ClickHouse/pull/66185) ([Robert Schulze](https://github.com/rschu1ze)). +* s3 requests: Reduce retry time for queries, increase retries count for backups. 8.5 minutes and 100 retires for queries, 1.2 hours and 1000 retries for backup restore. [#65232](https://github.com/ClickHouse/ClickHouse/pull/65232) ([Sema Checherinda](https://github.com/CheSema)). +* Support query plan LIMIT optimization. Support LIMIT pushdown for PostgreSQL storage and table function. [#65454](https://github.com/ClickHouse/ClickHouse/pull/65454) ([Maksim Kita](https://github.com/kitaisreal)). +* Improved ZooKeeper load balancing. The current session doesn't expire until the optimal nodes become available despite `fallback_session_lifetime`. Added support for AZ-aware balancing. [#65570](https://github.com/ClickHouse/ClickHouse/pull/65570) ([Alexander Tokmakov](https://github.com/tavplubix)). +* DatabaseCatalog drops tables faster by using up to database_catalog_drop_table_concurrency threads. [#66065](https://github.com/ClickHouse/ClickHouse/pull/66065) ([Sema Checherinda](https://github.com/CheSema)). + +#### Improvement +* The setting `optimize_trivial_insert_select` is disabled by default. In most cases, it should be beneficial. Nevertheless, if you are seeing slower INSERT SELECT or increased memory usage, you can enable it back or `SET compatibility = '24.6'`. [#58970](https://github.com/ClickHouse/ClickHouse/pull/58970) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Print stacktrace and diagnostic info if `clickhouse-client` or `clickhouse-local` crashes. [#61109](https://github.com/ClickHouse/ClickHouse/pull/61109) ([Alexander Tokmakov](https://github.com/tavplubix)). +* The result of `SHOW INDEX | INDEXES | INDICES | KEYS` was previously sorted by the primary key column names. Since this was unintuitive, the result is now sorted by the position of the primary key columns within the primary key. [#61131](https://github.com/ClickHouse/ClickHouse/pull/61131) ([Robert Schulze](https://github.com/rschu1ze)). +* Change how deduplication for Materialized Views works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)). +* Allow matching column names in a case insensitive manner when reading json files (`input_format_json_case_insensitive_column_matching`). [#61750](https://github.com/ClickHouse/ClickHouse/pull/61750) ([kevinyhzou](https://github.com/KevinyhZou)). +* Support reading partitioned data DeltaLake data. Infer DeltaLake schema by reading metadata instead of data. [#63201](https://github.com/ClickHouse/ClickHouse/pull/63201) ([Kseniia Sumarokova](https://github.com/kssenii)). +* In composable protocols TLS layer accepted only `certificateFile` and `privateKeyFile` parameters. https://clickhouse.com/docs/en/operations/settings/composable-protocols. [#63985](https://github.com/ClickHouse/ClickHouse/pull/63985) ([Anton Ivashkin](https://github.com/ianton-ru)). +* Added profile event `SelectQueriesWithPrimaryKeyUsage` which indicates how many SELECT queries use the primary key to evaluate the WHERE clause. [#64492](https://github.com/ClickHouse/ClickHouse/pull/64492) ([0x01f](https://github.com/0xfei)). +* `StorageS3Queue` related fixes and improvements. Deduce a default value of `s3queue_processing_threads_num` according to the number of physical cpu cores on the server (instead of the previous default value as 1). Set default value of `s3queue_loading_retries` to 10. Fix possible vague "Uncaught exception" in exception column of `system.s3queue`. Do not increment retry count on `MEMORY_LIMIT_EXCEEDED` exception. Move files commit to a stage after insertion into table fully finished to avoid files being commited while not inserted. Add settings `s3queue_max_processed_files_before_commit`, `s3queue_max_processed_rows_before_commit`, `s3queue_max_processed_bytes_before_commit`, `s3queue_max_processing_time_sec_before_commit`, to better control commit and flush time. [#65046](https://github.com/ClickHouse/ClickHouse/pull/65046) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support aliases in parametrized view function (only new analyzer). [#65190](https://github.com/ClickHouse/ClickHouse/pull/65190) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Updated to mask account key in logs in azureBlobStorage. [#65273](https://github.com/ClickHouse/ClickHouse/pull/65273) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Partition pruning for `IN` predicates when filter expression is a part of `PARTITION BY` expression. [#65335](https://github.com/ClickHouse/ClickHouse/pull/65335) ([Eduard Karacharov](https://github.com/korowa)). +* Add system tables with main information about all detached tables. [#65400](https://github.com/ClickHouse/ClickHouse/pull/65400) ([Konstantin Morozov](https://github.com/k-morozov)). +* `arrayMin`/`arrayMax` can be applicable to all data types that are comparable. [#65455](https://github.com/ClickHouse/ClickHouse/pull/65455) ([pn](https://github.com/chloro-pn)). +* Improved memory accounting for cgroups v2 to exclude the amount occupied by the page cache. [#65470](https://github.com/ClickHouse/ClickHouse/pull/65470) ([Nikita Taranov](https://github.com/nickitat)). +* Do not create format settings for each row when serializing chunks to insert to EmbeddedRocksDB table. [#65474](https://github.com/ClickHouse/ClickHouse/pull/65474) ([Duc Canh Le](https://github.com/canhld94)). +* Reduce `clickhouse-local` prompt to just `:)`. `getFQDNOrHostName()` takes too long on macOS, and we don't want a hostname in the prompt for `clickhouse-local` anyway. [#65510](https://github.com/ClickHouse/ClickHouse/pull/65510) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Avoid printing a message from jemalloc about per-CPU arenas on low-end virtual machines. [#65532](https://github.com/ClickHouse/ClickHouse/pull/65532) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable filesystem cache background download by default. It will be enabled back when we fix the issue with possible "Memory limit exceeded" because memory deallocation is done outside of query context (while buffer is allocated inside of query context) if we use background download threads. Plus we need to add a separate setting to define max size to download for background workers (currently it is limited by max_file_segment_size, which might be too big). [#65534](https://github.com/ClickHouse/ClickHouse/pull/65534) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add new option to config `` which allow to specify how often clickhouse will reload config. [#65545](https://github.com/ClickHouse/ClickHouse/pull/65545) ([alesapin](https://github.com/alesapin)). +* Implement binary encoding for ClickHouse data types and add its specification in docs. Use it in Dynamic binary serialization, allow to use it in RowBinaryWithNamesAndTypes and Native formats under settings. [#65546](https://github.com/ClickHouse/ClickHouse/pull/65546) ([Kruglov Pavel](https://github.com/Avogar)). +* Improved ZooKeeper load balancing. The current session doesn't expire until the optimal nodes become available despite `fallback_session_lifetime`. Added support for AZ-aware balancing. [#65570](https://github.com/ClickHouse/ClickHouse/pull/65570) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Server settings `compiled_expression_cache_size` and `compiled_expression_cache_elements_size` are now shown in `system.server_settings`. [#65584](https://github.com/ClickHouse/ClickHouse/pull/65584) ([Robert Schulze](https://github.com/rschu1ze)). +* Add support for user identification based on x509 SubjectAltName extension. [#65626](https://github.com/ClickHouse/ClickHouse/pull/65626) ([Anton Kozlov](https://github.com/tonickkozlov)). +* `clickhouse-local` will respect the `max_server_memory_usage` and `max_server_memory_usage_to_ram_ratio` from the configuration file. It will also set the max memory usage to 90% of the system memory by default, like `clickhouse-server` does. [#65697](https://github.com/ClickHouse/ClickHouse/pull/65697) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a script to backup your files to ClickHouse. [#65699](https://github.com/ClickHouse/ClickHouse/pull/65699) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* PostgreSQL source support cancel. [#65722](https://github.com/ClickHouse/ClickHouse/pull/65722) ([Maksim Kita](https://github.com/kitaisreal)). +* Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow to use `concat` function with empty arguments ``` sql :) select concat();. [#65887](https://github.com/ClickHouse/ClickHouse/pull/65887) ([李扬](https://github.com/taiyang-li)). +* Allow controlling named collections in clickhouse-local. [#65973](https://github.com/ClickHouse/ClickHouse/pull/65973) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve Azure profile events. [#65999](https://github.com/ClickHouse/ClickHouse/pull/65999) ([alesapin](https://github.com/alesapin)). +* Support ORC file read by writer time zone. [#66025](https://github.com/ClickHouse/ClickHouse/pull/66025) ([kevinyhzou](https://github.com/KevinyhZou)). +* Add settings to control connection to the PostgreSQL. * Setting `postgresql_connection_attempt_timeout` specifies the value passed to `connect_timeout` parameter of connection URL. * Setting `postgresql_connection_pool_retries` specifies the number of retries to establish a connection to the PostgreSQL end-point. [#66232](https://github.com/ClickHouse/ClickHouse/pull/66232) ([Dmitry Novik](https://github.com/novikd)). +* Reduce inaccuracy of input_wait_elapsed_us/input_wait_elapsed_us/elapsed_us. [#66239](https://github.com/ClickHouse/ClickHouse/pull/66239) ([Azat Khuzhin](https://github.com/azat)). +* Improve FilesystemCache ProfileEvents. [#66249](https://github.com/ClickHouse/ClickHouse/pull/66249) ([zhukai](https://github.com/nauu)). +* Add settings to ignore ON CLUSTER clause in queries for named collection management with replicated storage. [#66288](https://github.com/ClickHouse/ClickHouse/pull/66288) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Function `generateSnowflakeID` now allows to specify a machine ID as a parameter to prevent collisions in large clusters. [#66374](https://github.com/ClickHouse/ClickHouse/pull/66374) ([ZAWA_ll](https://github.com/Zawa-ll)). +* Disable suspending on Ctrl+Z in interactive mode. This is a common trap and is not expected behavior for almost all users. I imagine only a few extreme power users could appreciate suspending terminal applications to the background, but I don't know any. [#66511](https://github.com/ClickHouse/ClickHouse/pull/66511) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add option for validating the Primary key type in Dictionaries. Without this option for simple layouts any column type will be implicitly converted to UInt64. ### Documentation entry for user-facing changes. [#66595](https://github.com/ClickHouse/ClickHouse/pull/66595) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix unexpected size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). +* Check cyclic dependencies on CREATE/REPLACE/RENAME/EXCHANGE queries and throw an exception if there is a cyclic dependency. Previously such cyclic dependencies could lead to a deadlock during server startup. Also fix some bugs in dependencies creation. [#65405](https://github.com/ClickHouse/ClickHouse/pull/65405) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)). +* Fix the VALID UNTIL clause in the user definition resetting after a restart. [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix SHOW MERGES remaining time. [#66735](https://github.com/ClickHouse/ClickHouse/pull/66735) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `Query was cancelled` might have been printed twice in clickhouse-client. This behaviour is fixed. [#66005](https://github.com/ClickHouse/ClickHouse/pull/66005) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fixed crash while using MaterializedMySQL with TABLE OVERRIDE that maps MySQL NULL field into ClickHouse not NULL field. [#54649](https://github.com/ClickHouse/ClickHouse/pull/54649) ([Filipp Ozinov](https://github.com/bakwc)). +* Fix logical error when PREWHERE expression read no columns and table has no adaptive index granularity (very old table). [#59173](https://github.com/ClickHouse/ClickHouse/pull/59173) ([Alexander Gololobov](https://github.com/davenger)). +* Fix bug with cancellation buffer when canceling a query. [#64478](https://github.com/ClickHouse/ClickHouse/pull/64478) ([Sema Checherinda](https://github.com/CheSema)). +* Fix filling parts columns from metadata (when columns.txt does not exists). [#64757](https://github.com/ClickHouse/ClickHouse/pull/64757) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)). +* Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)). +* Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)). +* Fix unexpected projection name when query with CTE. [#65267](https://github.com/ClickHouse/ClickHouse/pull/65267) ([wudidapaopao](https://github.com/wudidapaopao)). +* Require `dictGet` privilege when accessing dictionaries via direct query or the `Dictionary` table engine. [#65359](https://github.com/ClickHouse/ClickHouse/pull/65359) ([Joe Lynch](https://github.com/joelynch)). +* Fix user-specific S3 auth with incremental backups. [#65481](https://github.com/ClickHouse/ClickHouse/pull/65481) ([Antonio Andelic](https://github.com/antonio2368)). +* Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix getting exception `Index out of bound for blob metadata` in case all files from list batch were filtered out. [#65523](https://github.com/ClickHouse/ClickHouse/pull/65523) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix NOT_FOUND_COLUMN_IN_BLOCK for deduplicate merge of projection. [#65573](https://github.com/ClickHouse/ClickHouse/pull/65573) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed a bug that compatibility level '23.4' was not properly applied. [#65737](https://github.com/ClickHouse/ClickHouse/pull/65737) ([cw5121](https://github.com/cw5121)). +* Fix odbc table with nullable fields. [#65738](https://github.com/ClickHouse/ClickHouse/pull/65738) ([Rodolphe Dugé de Bernonville](https://github.com/RodolpheDuge)). +* Fix data race in `TCPHandler`, which could happen on fatal error. [#65744](https://github.com/ClickHouse/ClickHouse/pull/65744) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)). +* For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)). +* Fix a bug leads to EmbeddedRocksDB with TTL write corrupted SST files. [#65816](https://github.com/ClickHouse/ClickHouse/pull/65816) ([Duc Canh Le](https://github.com/canhld94)). +* Functions `bitTest`, `bitTestAll`, and `bitTestAny` now return an error if the specified bit index is out-of-bounds [#65818](https://github.com/ClickHouse/ClickHouse/pull/65818) ([Pablo Marcos](https://github.com/pamarcos)). +* Setting `join_any_take_last_row` is supported in any query with hash join. [#65820](https://github.com/ClickHouse/ClickHouse/pull/65820) ([vdimir](https://github.com/vdimir)). +* Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)). +* Functions `bitShiftLeft` and `bitShitfRight` return an error for out of bounds shift positions [#65838](https://github.com/ClickHouse/ClickHouse/pull/65838) ([Pablo Marcos](https://github.com/pamarcos)). +* Fix growing memory usage in S3Queue. [#65839](https://github.com/ClickHouse/ClickHouse/pull/65839) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix tie handling in `arrayAUC` to match sklearn. [#65840](https://github.com/ClickHouse/ClickHouse/pull/65840) ([gabrielmcg44](https://github.com/gabrielmcg44)). +* Fix possible issues with MySQL server protocol TLS connections. [#65917](https://github.com/ClickHouse/ClickHouse/pull/65917) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible issues with MySQL client protocol TLS connections. [#65938](https://github.com/ClickHouse/ClickHouse/pull/65938) ([Azat Khuzhin](https://github.com/azat)). +* Fix handling of `SSL_ERROR_WANT_READ`/`SSL_ERROR_WANT_WRITE` with zero timeout. [#65941](https://github.com/ClickHouse/ClickHouse/pull/65941) ([Azat Khuzhin](https://github.com/azat)). +* Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)). +* Column _size in s3 engine and s3 table function denotes the size of a file inside the archive, not a size of the archive itself. [#65993](https://github.com/ClickHouse/ClickHouse/pull/65993) ([Daniil Ivanik](https://github.com/divanik)). +* Fix resolving dynamic subcolumns in analyzer, avoid reading the whole column on dynamic subcolumn reading. [#66004](https://github.com/ClickHouse/ClickHouse/pull/66004) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix config merging for from_env with replace overrides. [#66034](https://github.com/ClickHouse/ClickHouse/pull/66034) ([Azat Khuzhin](https://github.com/azat)). +* Fix a possible hanging in `GRPCServer` during shutdown. [#66061](https://github.com/ClickHouse/ClickHouse/pull/66061) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed several cases in function `has` with non-constant `LowCardinality` arguments. [#66088](https://github.com/ClickHouse/ClickHouse/pull/66088) ([Anton Popov](https://github.com/CurtizJ)). +* Fix for `groupArrayIntersect`. It had incorrect behavior in the `merge()` function. Also, fixed behavior in `deserialise()` for numeric and general data. [#66103](https://github.com/ClickHouse/ClickHouse/pull/66103) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)). +* Disable the `merge-filters` optimization introduced in [#64760](https://github.com/ClickHouse/ClickHouse/issues/64760). It may cause an exception if optimization merges two filter expressions and does not apply a short-circuit evaluation. [#66126](https://github.com/ClickHouse/ClickHouse/pull/66126) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed the issue when the server failed to parse Avro files with negative block size arrays encoded, which is now allowed by the Avro specification. [#66130](https://github.com/ClickHouse/ClickHouse/pull/66130) ([Serge Klochkov](https://github.com/slvrtrn)). +* Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)). +* Fix rare case with missing data in the result of distributed query. [#66174](https://github.com/ClickHouse/ClickHouse/pull/66174) ([vdimir](https://github.com/vdimir)). +* Fix order of parsing metadata fields in StorageDeltaLake. [#66211](https://github.com/ClickHouse/ClickHouse/pull/66211) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)). +* Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error reporting while copying to S3 or AzureBlobStorage. [#66295](https://github.com/ClickHouse/ClickHouse/pull/66295) ([Vitaly Baranov](https://github.com/vitlibar)). +* Prevent watchdog from keeping descriptors of unlinked(rotated) log files. [#66334](https://github.com/ClickHouse/ClickHouse/pull/66334) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix the bug that logicalexpressionoptimizerpass lost logical type of constant. [#66344](https://github.com/ClickHouse/ClickHouse/pull/66344) ([pn](https://github.com/chloro-pn)). +* Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible incorrect result for queries joining and filtering table external engine (like PostgreSQL), due to too aggressive filter pushdown. Since now, conditions from where section won't be send to external database in case of outer join with external table. [#66402](https://github.com/ClickHouse/ClickHouse/pull/66402) ([vdimir](https://github.com/vdimir)). +* Added missing column materialization for cross join. [#66413](https://github.com/ClickHouse/ClickHouse/pull/66413) ([lgbo](https://github.com/lgbo-ustc)). +* Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid possible logical error during import from Npy format in case of bad array nesting level, fix testing of other kinds of errors. [#66461](https://github.com/ClickHouse/ClickHouse/pull/66461) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix wrong count() result when there is non-deterministic function in predicate. [#66510](https://github.com/ClickHouse/ClickHouse/pull/66510) ([Duc Canh Le](https://github.com/canhld94)). +* Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix reading of uninitialized memory when hashing empty tuples. [#66562](https://github.com/ClickHouse/ClickHouse/pull/66562) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix `column_length` is not updated in `ColumnTuple::insertManyFrom`. [#66626](https://github.com/ClickHouse/ClickHouse/pull/66626) ([lgbo](https://github.com/lgbo-ustc)). +* Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix rare case of stuck merge after drop column. [#66707](https://github.com/ClickHouse/ClickHouse/pull/66707) ([Raúl Marín](https://github.com/Algunenano)). +* Fix assertion `isUniqTypes` when insert select from remote sources. [#66722](https://github.com/ClickHouse/ClickHouse/pull/66722) ([Sema Checherinda](https://github.com/CheSema)). +* Fix logical error in PrometheusRequestHandler. [#66621](https://github.com/ClickHouse/ClickHouse/pull/66621) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix `indexHint` function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)). +* Fix AST formatting of 'create table b empty as a'. [#64951](https://github.com/ClickHouse/ClickHouse/pull/64951) ([Michael Kolupaev](https://github.com/al13n321)). + +#### Build/Testing/Packaging Improvement +* Instantiate template methods ahead in different .cpp files, avoid too large translation units during compiling. [#64818](https://github.com/ClickHouse/ClickHouse/pull/64818) ([lgbo](https://github.com/lgbo-ustc)). + ### ClickHouse release 24.6, 2024-07-01 #### Backward Incompatible Change diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 159502c9735..341c92d3042 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -32,6 +32,7 @@ set (SRCS StringRef.cpp safeExit.cpp throwError.cpp + Numa.cpp ) add_library (common ${SRCS}) @@ -46,6 +47,10 @@ if (TARGET ch_contrib::crc32_s390x) target_link_libraries(common PUBLIC ch_contrib::crc32_s390x) endif() +if (TARGET ch_contrib::numactl) + target_link_libraries(common PUBLIC ch_contrib::numactl) +endif() + target_include_directories(common PUBLIC .. "${CMAKE_CURRENT_BINARY_DIR}/..") target_link_libraries (common diff --git a/base/base/Numa.cpp b/base/base/Numa.cpp new file mode 100644 index 00000000000..0bf56a993b8 --- /dev/null +++ b/base/base/Numa.cpp @@ -0,0 +1,37 @@ +#include + +#include "config.h" + +#if USE_NUMACTL +# include +#endif + +namespace DB +{ + +std::optional getNumaNodesTotalMemory() +{ + std::optional total_memory; +#if USE_NUMACTL + if (numa_available() != -1) + { + auto * membind = numa_get_membind(); + if (!numa_bitmask_equal(membind, numa_all_nodes_ptr)) + { + total_memory.emplace(0); + auto max_node = numa_max_node(); + for (int i = 0; i <= max_node; ++i) + { + if (numa_bitmask_isbitset(membind, i)) + *total_memory += numa_node_size(i, nullptr); + } + } + + numa_bitmask_free(membind); + } + +#endif + return total_memory; +} + +} diff --git a/base/base/Numa.h b/base/base/Numa.h new file mode 100644 index 00000000000..b48ab15766b --- /dev/null +++ b/base/base/Numa.h @@ -0,0 +1,12 @@ +#pragma once + +#include + +namespace DB +{ + +/// return total memory of NUMA nodes the process is bound to +/// if NUMA is not supported or process can use all nodes, std::nullopt is returned +std::optional getNumaNodesTotalMemory(); + +} diff --git a/base/base/defines.h b/base/base/defines.h index 2fc54c37bde..5685a6d9833 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -87,10 +87,13 @@ # define ASAN_POISON_MEMORY_REGION(a, b) #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) - #if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) || defined(UNDEFINED_BEHAVIOR_SANITIZER) - #define ABORT_ON_LOGICAL_ERROR - #endif +/// We used to have only ABORT_ON_LOGICAL_ERROR macro, but most of its uses were actually in places where we didn't care about logical errors +/// but wanted to check exactly if the current build type is debug or with sanitizer. This new macro is introduced to fix those places. +#if !defined(DEBUG_OR_SANITIZER_BUILD) +# if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) \ + || defined(UNDEFINED_BEHAVIOR_SANITIZER) +# define DEBUG_OR_SANITIZER_BUILD +# endif #endif /// chassert(x) is similar to assert(x), but: @@ -101,7 +104,7 @@ /// Also it makes sense to call abort() instead of __builtin_unreachable() in debug builds, /// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy) #if !defined(chassert) - #if defined(ABORT_ON_LOGICAL_ERROR) +# if defined(DEBUG_OR_SANITIZER_BUILD) // clang-format off #include namespace DB diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index afdb6ba068a..03aab1eac72 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -2,15 +2,14 @@ #include #include +#include #include -#include #include #include #include - namespace { @@ -63,6 +62,9 @@ uint64_t getMemoryAmountOrZero() uint64_t memory_amount = num_pages * page_size; + if (auto total_numa_memory = DB::getNumaNodesTotalMemory(); total_numa_memory.has_value()) + memory_amount = *total_numa_memory; + /// Respect the memory limit set by cgroups v2. auto limit_v2 = getCgroupsV2MemoryLimit(); if (limit_v2.has_value() && *limit_v2 < memory_amount) diff --git a/base/glibc-compatibility/CMakeLists.txt b/base/glibc-compatibility/CMakeLists.txt index c967fa5b11b..8948e25cb8e 100644 --- a/base/glibc-compatibility/CMakeLists.txt +++ b/base/glibc-compatibility/CMakeLists.txt @@ -18,6 +18,16 @@ if (GLIBC_COMPATIBILITY) message (FATAL_ERROR "glibc_compatibility can only be used on x86_64 or aarch64.") endif () + if (SANITIZE STREQUAL thread) + # Disable TSAN instrumentation that conflicts with re-exec due to high ASLR entropy using getauxval + # See longer comment in __auxv_init_procfs + # In the case of tsan we need to make sure getauxval is not instrumented as that would introduce tsan + # internal calls to functions that depend on a state that isn't initialized yet + set_source_files_properties( + musl/getauxval.c + PROPERTIES COMPILE_FLAGS "-mllvm -tsan-instrument-func-entry-exit=false") + endif() + # Need to omit frame pointers to match the performance of glibc set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer") diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index ea5cff9fc11..ec2cce1e4aa 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -75,6 +75,44 @@ unsigned long NO_SANITIZE_THREAD __getauxval_procfs(unsigned long type) } static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type) { +#if defined(__x86_64__) && defined(__has_feature) +# if __has_feature(memory_sanitizer) || __has_feature(thread_sanitizer) + /// Sanitizers are not compatible with high ASLR entropy, which is the default on modern Linux distributions, and + /// to workaround this limitation, TSAN and MSAN (couldn't see other sanitizers doing the same), re-exec the binary + /// without ASLR (see https://github.com/llvm/llvm-project/commit/0784b1eefa36d4acbb0dacd2d18796e26313b6c5) + + /// The problem we face is that, in order to re-exec, the sanitizer wants to use the original pathname in the call + /// and to get its value it uses getauxval (https://github.com/llvm/llvm-project/blob/20eff684203287828d6722fc860b9d3621429542/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp#L985-L988). + /// Since we provide getauxval ourselves (to minimize the version dependency on runtime glibc), we are the ones + // being called and we fail horribly: + /// + /// ==301455==ERROR: MemorySanitizer: SEGV on unknown address 0x2ffc6d721550 (pc 0x5622c1cc0073 bp 0x000000000003 sp 0x7ffc6d721530 T301455) + /// ==301455==The signal is caused by a WRITE memory access. + /// #0 0x5622c1cc0073 in __auxv_init_procfs ./ClickHouse/base/glibc-compatibility/musl/getauxval.c:129:5 + /// #1 0x5622c1cbffe9 in getauxval ./ClickHouse/base/glibc-compatibility/musl/getauxval.c:240:12 + /// #2 0x5622c0d7bfb4 in __sanitizer::ReExec() crtstuff.c + /// #3 0x5622c0df7bfc in __msan::InitShadowWithReExec(bool) crtstuff.c + /// #4 0x5622c0d95356 in __msan_init (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x256356) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741) + /// #5 0x5622c0dfe878 in msan.module_ctor main.cc + /// #6 0x5622c1cc156c in __libc_csu_init (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x118256c) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741) + /// #7 0x73dc05dd7ea3 in __libc_start_main /usr/src/debug/glibc/glibc/csu/../csu/libc-start.c:343:6 + /// #8 0x5622c0d6b7cd in _start (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x22c7cd) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741) + + /// The source of the issue above is that, at this point in time during __msan_init, we can't really do much as + /// most global variables aren't initialized or available yet, so we can't initiate the auxiliary vector. + /// Normal glibc / musl getauxval doesn't have this problem since they initiate their auxval vector at the very + /// start of __libc_start_main (just keeping track of argv+argc+1), but we don't have such option (otherwise + /// this complexity of reading "/proc/self/auxv" or using __environ would not be necessary). + + /// To avoid this crashes on the re-exec call (see above how it would fail when creating `aux`, and if we used + /// __auxv_init_environ then it would SIGSEV on READing `__environ`) we capture this call for `AT_EXECFN` and + /// unconditionally return "/proc/self/exe" without any preparation. Theoretically this should be fine in + /// our case, as we don't load any libraries. That's the theory at least. + if (type == AT_EXECFN) + return (unsigned long)"/proc/self/exe"; +# endif +#endif + // For debugging: // - od -t dL /proc/self/auxv // - LD_SHOW_AUX= ls @@ -199,7 +237,7 @@ static unsigned long NO_SANITIZE_THREAD __auxv_init_environ(unsigned long type) // - __auxv_init_procfs -> __auxv_init_environ -> __getauxval_environ static void * volatile getauxval_func = (void *)__auxv_init_procfs; -unsigned long getauxval(unsigned long type) +unsigned long NO_SANITIZE_THREAD getauxval(unsigned long type) { return ((unsigned long (*)(unsigned long))getauxval_func)(type); } diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index bb776fa9506..d69646d3694 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54488) +SET(VERSION_REVISION 54489) SET(VERSION_MAJOR 24) -SET(VERSION_MINOR 7) +SET(VERSION_MINOR 8) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH aa023477a9265e403982fca5ee29a714db5133d9) -SET(VERSION_DESCRIBE v24.7.1.1-testing) -SET(VERSION_STRING 24.7.1.1) +SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af) +SET(VERSION_DESCRIBE v24.8.1.1-testing) +SET(VERSION_STRING 24.8.1.1) # end of autochange diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 90ae5981a21..977efda15ff 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -230,6 +230,8 @@ add_contrib (libssh-cmake libssh) add_contrib (prometheus-protobufs-cmake prometheus-protobufs prometheus-protobufs-gogo) +add_contrib(numactl-cmake numactl) + # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, diff --git a/contrib/datasketches-cpp-cmake/CMakeLists.txt b/contrib/datasketches-cpp-cmake/CMakeLists.txt index b12a88ad57b..497d6956d0e 100644 --- a/contrib/datasketches-cpp-cmake/CMakeLists.txt +++ b/contrib/datasketches-cpp-cmake/CMakeLists.txt @@ -9,6 +9,7 @@ set(DATASKETCHES_LIBRARY theta) add_library(_datasketches INTERFACE) target_include_directories(_datasketches SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/common/include" + "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/count/include" "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/include") add_library(ch_contrib::datasketches ALIAS _datasketches) diff --git a/contrib/icu b/contrib/icu index a56dde820dc..7750081bda4 160000 --- a/contrib/icu +++ b/contrib/icu @@ -1 +1 @@ -Subproject commit a56dde820dc35665a66f2e9ee8ba58e75049b668 +Subproject commit 7750081bda4b3bc1768ae03849ec70f67ea10625 diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index 0a650f2bcc0..f9d05f7fe97 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -4,7 +4,9 @@ else () option(ENABLE_ICU "Enable ICU" 0) endif () -if (NOT ENABLE_ICU) +# Temporarily disabled s390x because the ICU build links a blob (icudt71b_dat.S) and our friends from IBM did not explain how they generated +# the blob on s390x: https://github.com/ClickHouse/icudata/pull/2#issuecomment-2226957255 +if (NOT ENABLE_ICU OR ARCH_S390X) message(STATUS "Not using ICU") return() endif() @@ -12,8 +14,6 @@ endif() set(ICU_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/icu/icu4c/source") set(ICUDATA_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/icudata/") -set (CMAKE_CXX_STANDARD 17) - # These lists of sources were generated from build log of the original ICU build system (configure + make). set(ICUUC_SOURCES @@ -462,9 +462,9 @@ file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ") enable_language(ASM) if (ARCH_S390X) - set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70b_dat.S" ) + set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75b_dat.S" ) else() - set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70l_dat.S" ) + set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75l_dat.S" ) endif() set(ICUDATA_SOURCES diff --git a/contrib/icudata b/contrib/icudata index c8e717892a5..d345d6ac22f 160000 --- a/contrib/icudata +++ b/contrib/icudata @@ -1 +1 @@ -Subproject commit c8e717892a557b4d2852317c7d628aacc0a0e5ab +Subproject commit d345d6ac22f381c882420de9053d30ae1ff38d75 diff --git a/contrib/libunwind b/contrib/libunwind index 8f28e64d158..a89d904befe 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 8f28e64d15819d2d096badd598c7d85bebddb1f2 +Subproject commit a89d904befea07814628c6ce0b44083c4e149c62 diff --git a/contrib/numactl b/contrib/numactl new file mode 160000 index 00000000000..8d13d63a05f --- /dev/null +++ b/contrib/numactl @@ -0,0 +1 @@ +Subproject commit 8d13d63a05f0c3cd88bf777cbb61541202b7da08 diff --git a/contrib/numactl-cmake/CMakeLists.txt b/contrib/numactl-cmake/CMakeLists.txt new file mode 100644 index 00000000000..a72ff11e485 --- /dev/null +++ b/contrib/numactl-cmake/CMakeLists.txt @@ -0,0 +1,30 @@ +if (NOT ( + OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_LOONGARCH64)) +) + if (ENABLE_NUMACTL) + message (${RECONFIGURE_MESSAGE_LEVEL} + "numactl is disabled implicitly because the OS or architecture is not supported. Use -DENABLE_NUMACTL=0") + endif () + set (ENABLE_NUMACTL OFF) +else() + option (ENABLE_NUMACTL "Enable numactl" ${ENABLE_LIBRARIES}) +endif() + +if (NOT ENABLE_NUMACTL) + message (STATUS "Not using numactl") + return() +endif () + +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/numactl") + +set (SRCS + "${LIBRARY_DIR}/libnuma.c" + "${LIBRARY_DIR}/syscall.c" +) + +add_library(_numactl ${SRCS}) + +target_include_directories(_numactl SYSTEM PRIVATE include) +target_include_directories(_numactl SYSTEM PUBLIC "${LIBRARY_DIR}") + +add_library(ch_contrib::numactl ALIAS _numactl) diff --git a/contrib/numactl-cmake/include/config.h b/contrib/numactl-cmake/include/config.h new file mode 100644 index 00000000000..a304db38e53 --- /dev/null +++ b/contrib/numactl-cmake/include/config.h @@ -0,0 +1,82 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Checking for symver attribute */ +#define HAVE_ATTRIBUTE_SYMVER 0 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDIO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "numactl" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "numactl" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "numactl 2.1" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "numactl" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.1" + +/* Define to 1 if all of the C89 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#define STDC_HEADERS 1 + +/* If the compiler supports a TLS storage class define it to that here */ +#define TLS __thread + +/* Version number of package */ +#define VERSION "2.1" + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ + +/* Define to 1 on platforms where this makes off_t a 64-bit type. */ +/* #undef _LARGE_FILES */ + +/* Number of bits in time_t, on hosts where this is settable. */ +/* #undef _TIME_BITS */ + +/* Define to 1 on platforms where this makes time_t a 64-bit type. */ +/* #undef __MINGW_USE_VC2005_COMPAT */ diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 2317f84e0cb..a81826ed6b5 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -23,15 +23,17 @@ RUN apt-get update \ # and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB). # TSAN will flush shadow memory when reaching this limit. # It may cause false-negatives, but it's better than OOM. -RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment -RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment -RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment -RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment +# max_allocation_size_mb is set to 32GB, so we have much bigger chance to run into memory limit than the limitation of the sanitizers +RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1 max_allocation_size_mb=32768'" >> /etc/environment +RUN echo "UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768'" >> /etc/environment +RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768'" >> /etc/environment +RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt max_allocation_size_mb=32768'" >> /etc/environment # Sanitizer options for current shell (not current, but the one that will be spawned on "docker run") # (but w/o verbosity for TSAN, otherwise test.reference will not match) -ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' -ENV UBSAN_OPTIONS='print_stacktrace=1' -ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' +ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1 max_allocation_size_mb=32768' +ENV UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768' +ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768' +ENV LSAN_OPTIONS='max_allocation_size_mb=32768' # for external_symbolizer_path RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 970bf12a81a..26283afc86a 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -261,9 +261,12 @@ function timeout_with_logging() { timeout -s TERM --preserve-status "${@}" || exit_code="${?}" + echo "Checking if it is a timeout. The code 124 will indicate a timeout." if [[ "${exit_code}" -eq "124" ]] then - echo "The command 'timeout ${*}' has been killed by timeout" + echo "The command 'timeout ${*}' has been killed by timeout." + else + echo "No, it isn't a timeout." fi return $exit_code diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml index e2a4976b385..d5b876a4c85 100644 --- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml +++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml @@ -20,7 +20,7 @@ - 10G + 5G diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 6191aeaf304..b8f967ed9c2 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -208,7 +208,6 @@ handle SIGPIPE nostop noprint pass handle SIGTERM nostop noprint pass handle SIGUSR1 nostop noprint pass handle SIGUSR2 nostop noprint pass -handle SIGSEGV nostop pass handle SIG$RTMIN nostop noprint pass info signals continue diff --git a/docker/test/performance-comparison/run.sh b/docker/test/performance-comparison/run.sh index 7afb5da59b1..6ef781fa4c8 100644 --- a/docker/test/performance-comparison/run.sh +++ b/docker/test/performance-comparison/run.sh @@ -13,6 +13,7 @@ entry="/usr/share/clickhouse-test/performance/scripts/entrypoint.sh" # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt # Double-escaped backslashes are a tribute to the engineering wonder of docker -- # it gives '/bin/sh: 1: [bash,: not found' otherwise. +numactl --hardware node=$(( RANDOM % $(numactl --hardware | sed -n 's/^.*available:\(.*\)nodes.*$/\1/p') )); echo Will bind to NUMA node $node; numactl --cpunodebind=$node --membind=$node $entry diff --git a/docker/test/sqlancer/Dockerfile b/docker/test/sqlancer/Dockerfile index 82fc2598397..3b919ffb3e3 100644 --- a/docker/test/sqlancer/Dockerfile +++ b/docker/test/sqlancer/Dockerfile @@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list RUN apt-get update --yes \ - && env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends \ + && env DEBIAN_FRONTEND=noninteractive apt-get install wget git python3 default-jdk maven --yes --no-install-recommends \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 857385f4715..8e2f1890f89 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -191,8 +191,8 @@ else ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" - clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" + clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC" clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC" else @@ -200,7 +200,7 @@ else clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" fi clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" fi clickhouse-client --query "SHOW TABLES FROM test" @@ -251,9 +251,12 @@ function timeout_with_logging() { timeout -s TERM --preserve-status "${@}" || exit_code="${?}" + echo "Checking if it is a timeout. The code 124 will indicate a timeout." if [[ "${exit_code}" -eq "124" ]] then - echo "The command 'timeout ${*}' has been killed by timeout" + echo "The command 'timeout ${*}' has been killed by timeout." + else + echo "No, it isn't a timeout." fi return $exit_code diff --git a/docker/test/stateless/attach_gdb.lib b/docker/test/stateless/attach_gdb.lib index eb54f920b98..d288288bb17 100644 --- a/docker/test/stateless/attach_gdb.lib +++ b/docker/test/stateless/attach_gdb.lib @@ -20,7 +20,6 @@ handle SIGPIPE nostop noprint pass handle SIGTERM nostop noprint pass handle SIGUSR1 nostop noprint pass handle SIGUSR2 nostop noprint pass -handle SIGSEGV nostop pass handle SIG$RTMIN nostop noprint pass info signals continue diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 670a2420ddf..f9f96c76d59 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -247,12 +247,22 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" + TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800)) + START_TIME=${SECONDS} set +e - timeout -k 60m -s TERM --preserve-status 140m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ - --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ + timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s \ + clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt set -e + DURATION=$((START_TIME - SECONDS)) + + echo "Elapsed ${DURATION} seconds." + if [[ $DURATION -ge $TIMEOUT ]] + then + echo "It looks like the command is terminated by the timeout, which is ${TIMEOUT} seconds." + fi } export -f run_tests @@ -264,7 +274,7 @@ if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ - | sed 's/All tests have finished//' | sed 's/No tests were run//' ||: + | sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||: fi timeout_with_logging "$TIMEOUT" bash -c run_tests ||: diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index c3bb8ae9ea4..cb257536c36 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -45,9 +45,12 @@ function timeout_with_logging() { timeout -s TERM --preserve-status "${@}" || exit_code="${?}" + echo "Checking if it is a timeout. The code 124 will indicate a timeout." if [[ "${exit_code}" -eq "124" ]] then - echo "The command 'timeout ${*}' has been killed by timeout" + echo "The command 'timeout ${*}' has been killed by timeout." + else + echo "No, it isn't a timeout." fi return $exit_code diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 86467394513..b21114e456f 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -209,9 +209,9 @@ clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDat ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" -clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" -clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" -clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" +clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" +clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" +clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC" clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC" diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index 4442c9d7d9e..3da1a8f3674 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -12,6 +12,7 @@ UNKNOWN_SIGN = "[ UNKNOWN " SKIPPED_SIGN = "[ SKIPPED " HUNG_SIGN = "Found hung queries in processlist" SERVER_DIED_SIGN = "Server died, terminating all processes" +SERVER_DIED_SIGN2 = "Server does not respond to health check" DATABASE_SIGN = "Database: " SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"] @@ -43,7 +44,7 @@ def process_test_log(log_path, broken_tests): if HUNG_SIGN in line: hung = True break - if SERVER_DIED_SIGN in line: + if SERVER_DIED_SIGN in line or SERVER_DIED_SIGN2 in line: server_died = True if RETRIES_SIGN in line: retries = True @@ -111,12 +112,12 @@ def process_test_log(log_path, broken_tests): # Python does not support TSV, so we have to escape '\t' and '\n' manually # and hope that complex escape sequences will not break anything test_results = [ - ( + [ test[0], test[1], test[2], "".join(test[3])[:4096].replace("\t", "\\t").replace("\n", "\\n"), - ) + ] for test in test_results ] @@ -170,18 +171,23 @@ def process_result(result_path, broken_tests): if hung: description = "Some queries hung, " state = "failure" - test_results.append(("Some queries hung", "FAIL", "0", "")) + test_results.append(["Some queries hung", "FAIL", "0", ""]) elif server_died: description = "Server died, " state = "failure" - test_results.append(("Server died", "FAIL", "0", "")) + # When ClickHouse server crashes, some tests are still running + # and fail because they cannot connect to server + for result in test_results: + if result[1] == "FAIL": + result[1] = "SERVER_DIED" + test_results.append(["Server died", "FAIL", "0", ""]) elif not success_finish: description = "Tests are not finished, " state = "failure" - test_results.append(("Tests are not finished", "FAIL", "0", "")) + test_results.append(["Tests are not finished", "FAIL", "0", ""]) elif retries: description = "Some tests restarted, " - test_results.append(("Some tests restarted", "SKIPPED", "0", "")) + test_results.append(["Some tests restarted", "SKIPPED", "0", ""]) else: description = "" @@ -233,11 +239,12 @@ if __name__ == "__main__": # sort by status then by check name order = { "FAIL": 0, - "Timeout": 1, - "NOT_FAILED": 2, - "BROKEN": 3, - "OK": 4, - "SKIPPED": 5, + "SERVER_DIED": 1, + "Timeout": 2, + "NOT_FAILED": 3, + "BROKEN": 4, + "OK": 5, + "SKIPPED": 6, } return order.get(item[1], 10), str(item[0]), item[1] diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 3826e4e9c94..7ffbd9a5bae 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -999,6 +999,10 @@ They can be used for prewhere optimization only if we enable `set allow_statisti [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains. +- `count_min` + + [Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column. + ## Column-level Settings {#column-level-settings} Certain MergeTree settings can be override at column level: diff --git a/docs/en/engines/table-engines/special/keepermap.md b/docs/en/engines/table-engines/special/keepermap.md index 5559cc2c648..04a9a4b0d4e 100644 --- a/docs/en/engines/table-engines/special/keepermap.md +++ b/docs/en/engines/table-engines/special/keepermap.md @@ -54,7 +54,7 @@ CREATE TABLE keeper_map_table `v2` String, `v3` Float32 ) -ENGINE = KeeperMap(/keeper_map_table, 4) +ENGINE = KeeperMap('/keeper_map_table', 4) PRIMARY KEY key ``` diff --git a/docs/en/getting-started/example-datasets/nypd_complaint_data.md b/docs/en/getting-started/example-datasets/nypd_complaint_data.md index a178fe456a6..1caa1ab6a9a 100644 --- a/docs/en/getting-started/example-datasets/nypd_complaint_data.md +++ b/docs/en/getting-started/example-datasets/nypd_complaint_data.md @@ -55,7 +55,7 @@ CMPLNT_FR_TM Nullable(String) ``` :::tip -Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](/docs/en/integrations/data-ingestion/data-formats/json.md#relying-on-schema-inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000` +Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](/en/integrations/data-formats/json/inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000` you can get a better idea of the content. Note: as of version 22.5 the default is now 25,000 rows for inferring the schema, so only change the setting if you are on an older version or if you need more than 25,000 rows to be sampled. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 9c4c082bc3a..8892c6d8d3f 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -32,6 +32,7 @@ The supported formats are: | [Vertical](#vertical) | ✗ | ✔ | | [JSON](#json) | ✔ | ✔ | | [JSONAsString](#jsonasstring) | ✔ | ✗ | +| [JSONAsObject](#jsonasobject) | ✔ | ✗ | | [JSONStrings](#jsonstrings) | ✔ | ✔ | | [JSONColumns](#jsoncolumns) | ✔ | ✔ | | [JSONColumnsWithMetadata](#jsoncolumnsmonoblock) | ✔ | ✔ | @@ -822,6 +823,67 @@ Result: └────────────────────────────┘ ``` +## JSONAsObject {#jsonasobject} + +In this format, a single JSON object is interpreted as a single [Object('json')](/docs/en/sql-reference/data-types/json.md) value. If the input has several JSON objects (comma separated), they are interpreted as separate rows. If the input data is enclosed in square brackets, it is interpreted as an array of JSONs. + +This format can only be parsed for a table with a single field of type [Object('json')](/docs/en/sql-reference/data-types/json.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized). + +**Examples** + +Query: + +``` sql +SET allow_experimental_object_type = 1; +CREATE TABLE json_as_object (json Object('json')) ENGINE = Memory; +INSERT INTO json_as_object (json) FORMAT JSONAsObject {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1} +SELECT * FROM json_as_object FORMAT JSONEachRow; +``` + +Result: + +``` response +{"json":{"any json stucture":0,"foo":{"bar":{"x":"y"},"baz":1}}} +{"json":{"any json stucture":0,"foo":{"bar":{"x":""},"baz":0}}} +{"json":{"any json stucture":1,"foo":{"bar":{"x":""},"baz":0}}} +``` + +**An array of JSON objects** + +Query: + +``` sql +SET allow_experimental_object_type = 1; +CREATE TABLE json_square_brackets (field Object('json')) ENGINE = Memory; +INSERT INTO json_square_brackets FORMAT JSONAsObject [{"id": 1, "name": "name1"}, {"id": 2, "name": "name2"}]; + +SELECT * FROM json_square_brackets FORMAT JSONEachRow; +``` + +Result: + +```response +{"field":{"id":1,"name":"name1"}} +{"field":{"id":2,"name":"name2"}} +``` + +**Columns with default values** + +```sql +SET allow_experimental_object_type = 1; +CREATE TABLE json_as_object (json Object('json'), time DateTime MATERIALIZED now()) ENGINE = Memory; +INSERT INTO json_as_object (json) FORMAT JSONAsObject {"foo":{"bar":{"x":"y"},"baz":1}}; +INSERT INTO json_as_object (json) FORMAT JSONAsObject {}; +INSERT INTO json_as_object (json) FORMAT JSONAsObject {"any json stucture":1} +SELECT * FROM json_as_object FORMAT JSONEachRow +``` + +```resonse +{"json":{"any json stucture":0,"foo":{"bar":{"x":"y"},"baz":1}},"time":"2024-07-25 17:02:45"} +{"json":{"any json stucture":0,"foo":{"bar":{"x":""},"baz":0}},"time":"2024-07-25 17:02:47"} +{"json":{"any json stucture":1,"foo":{"bar":{"x":""},"baz":0}},"time":"2024-07-25 17:02:50"} +``` + ## JSONCompact {#jsoncompact} Differs from JSON only in that data rows are output in arrays, not in objects. diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md index 96477f777a9..ef1e58fd18e 100644 --- a/docs/en/operations/settings/settings-users.md +++ b/docs/en/operations/settings/settings-users.md @@ -22,6 +22,21 @@ Structure of the `users` section: + + + ssh-ed25519 + AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj + + + ecdsa-sha2-nistp256 + AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBNxeV2uN5UY6CUbCzTA1rXfYimKQA5ivNIqxdax4bcMXz4D0nSk2l5E1TkR5mG8EBWtmExSPbcEPJ8V7lyWWbA8= + + + ssh-rsa + AAAAB3NzaC1yc2EAAAADAQABAAABgQCpgqL1SHhPVBOTFlOm0pu+cYBbADzC2jL41sPMawYCJHDyHuq7t+htaVVh2fRgpAPmSEnLEC2d4BEIKMtPK3bfR8plJqVXlLt6Q8t4b1oUlnjb3VPA9P6iGcW7CV1FBkZQEVx8ckOfJ3F+kI5VsrRlEDgiecm/C1VPl0/9M2llW/mPUMaD65cM9nlZgM/hUeBrfxOEqM11gDYxEZm1aRSbZoY4dfdm3vzvpSQ6lrCrkjn3X2aSmaCLcOWJhfBWMovNDB8uiPuw54g3ioZ++qEQMlfxVsqXDGYhXCrsArOVuW/5RbReO79BvXqdssiYShfwo+GhQ0+aLWMIW/jgBkkqx/n7uKLzCMX7b2F+aebRYFh+/QXEj7SnihdVfr9ud6NN3MWzZ1ltfIczlEcFLrLJ1Yq57wW6wXtviWh59WvTWFiPejGjeSjjJyqqB49tKdFVFuBnIU5u/bch2DXVgiAEdQwUrIp1ACoYPq22HFFAYUJrL32y7RxX3PGzuAv3LOc= + + + 0|1 @@ -79,6 +94,24 @@ Password can be specified in plaintext or in SHA256 (hex format). The first line of the result is the password. The second line is the corresponding double SHA1 hash. +### username/ssh-key {#user-sshkey} + +This setting allows authenticating with SSH keys. + +Given a SSH key (as generated by `ssh-keygen`) like +``` +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj john@example.com +``` +The `ssh_key` element is expected to be +``` + + ssh-ed25519 + AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj + +``` + +Substitute `ssh-ed25519` with `ssh-rsa` or `ecdsa-sha2-nistp256` for the other supported algorithms. + ### access_management {#access_management-user-setting} This setting enables or disables using of SQL-driven [access control and account management](../../guides/sre/user-management/index.md#access-control) for the user. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c3f697c3bdc..8739414464e 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -5608,3 +5608,15 @@ Default value: `10000000`. Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached. Default value: `1GiB`. + +## restore_replace_external_engines_to_null + +For testing purposes. Replaces all external engines to Null to not initiate external connections. + +Default value: `False` + +## restore_replace_external_table_functions_to_null + +For testing purposes. Replaces all external table functions to Null to not initiate external connections. + +Default value: `False` \ No newline at end of file diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index f2eac12594d..f218c8d0339 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -7,7 +7,7 @@ keywords: [object, data type] # Object Data Type (deprecated) -**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864). +**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-formats/json/overview) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864).
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index b7e4094f30e..012b0db96b5 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2102,14 +2102,14 @@ Result: └─────────────────┘ ``` -## filesystemFree +## filesystemUnreserved -Returns the total amount of the free space on the filesystem hosting the database persistence. See also `filesystemAvailable` +Returns the total amount of the free space on the filesystem hosting the database persistence. (previously `filesystemFree`). See also [`filesystemAvailable`](#filesystemavailable). **Syntax** ```sql -filesystemFree() +filesystemUnreserved() ``` **Returned value** @@ -2121,7 +2121,7 @@ filesystemFree() Query: ```sql -SELECT formatReadableSize(filesystemFree()) AS "Free space"; +SELECT formatReadableSize(filesystemUnreserved()) AS "Free space"; ``` Result: @@ -2449,11 +2449,11 @@ As you can see, `runningAccumulate` merges states for each group of rows separat ## joinGet -The function lets you extract data from the table the same way as from a [dictionary](../../sql-reference/dictionaries/index.md). - -Gets the data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key. +The function lets you extract data from the table the same way as from a [dictionary](../../sql-reference/dictionaries/index.md). Gets the data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key. +:::note Only supports tables created with the `ENGINE = Join(ANY, LEFT, )` statement. +::: **Syntax** @@ -2463,26 +2463,32 @@ joinGet(join_storage_table_name, `value_column`, join_keys) **Arguments** -- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed. The identifier is searched in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example. +- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed. - `value_column` — name of the column of the table that contains required data. - `join_keys` — list of keys. +:::note +The identifier is searched for in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example. +::: + **Returned value** -Returns a list of values corresponded to list of keys. - -If certain does not exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting. +- Returns a list of values corresponded to the list of keys. +:::note +If a certain key does not exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting during table creation. More info about `join_use_nulls` in [Join operation](../../engines/table-engines/special/join.md). +::: **Example** Input table: ```sql -CREATE DATABASE db_test -CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 1 -INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13) +CREATE DATABASE db_test; +CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id); +INSERT INTO db_test.id_val VALUES (1, 11)(2, 12)(4, 13); +SELECT * FROM db_test.id_val; ``` ```text @@ -2496,18 +2502,116 @@ INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13) Query: ```sql -SELECT joinGet(db_test.id_val, 'val', toUInt32(number)) from numbers(4) SETTINGS join_use_nulls = 1 +SELECT number, joinGet(db_test.id_val, 'val', toUInt32(number)) from numbers(4); ``` Result: ```text -┌─joinGet(db_test.id_val, 'val', toUInt32(number))─┐ -│ 0 │ -│ 11 │ -│ 12 │ -│ 0 │ -└──────────────────────────────────────────────────┘ + ┌─number─┬─joinGet('db_test.id_val', 'val', toUInt32(number))─┐ +1. │ 0 │ 0 │ +2. │ 1 │ 11 │ +3. │ 2 │ 12 │ +4. │ 3 │ 0 │ + └────────┴────────────────────────────────────────────────────┘ +``` + +Setting `join_use_nulls` can be used during table creation to change the behaviour of what gets returned if no key exists in the source table. + +```sql +CREATE DATABASE db_test; +CREATE TABLE db_test.id_val_nulls(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls=1; +INSERT INTO db_test.id_val_nulls VALUES (1, 11)(2, 12)(4, 13); +SELECT * FROM db_test.id_val_nulls; +``` + +```text +┌─id─┬─val─┐ +│ 4 │ 13 │ +│ 2 │ 12 │ +│ 1 │ 11 │ +└────┴─────┘ +``` + +Query: + +```sql +SELECT number, joinGet(db_test.id_val_nulls, 'val', toUInt32(number)) from numbers(4); +``` + +Result: + +```text + ┌─number─┬─joinGet('db_test.id_val_nulls', 'val', toUInt32(number))─┐ +1. │ 0 │ ᴺᵁᴸᴸ │ +2. │ 1 │ 11 │ +3. │ 2 │ 12 │ +4. │ 3 │ ᴺᵁᴸᴸ │ + └────────┴──────────────────────────────────────────────────────────┘ +``` + +## joinGetOrNull + +Like [joinGet](#joinget) but returns `NULL` when the key is missing instead of returning the default value. + +**Syntax** + +```sql +joinGetOrNull(join_storage_table_name, `value_column`, join_keys) +``` + +**Arguments** + +- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed. +- `value_column` — name of the column of the table that contains required data. +- `join_keys` — list of keys. + +:::note +The identifier is searched for in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example. +::: + +**Returned value** + +- Returns a list of values corresponded to the list of keys. + +:::note +If a certain key does not exist in source table then `NULL` is returned for that key. +::: + +**Example** + +Input table: + +```sql +CREATE DATABASE db_test; +CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id); +INSERT INTO db_test.id_val VALUES (1, 11)(2, 12)(4, 13); +SELECT * FROM db_test.id_val; +``` + +```text +┌─id─┬─val─┐ +│ 4 │ 13 │ +│ 2 │ 12 │ +│ 1 │ 11 │ +└────┴─────┘ +``` + +Query: + +```sql +SELECT number, joinGetOrNull(db_test.id_val, 'val', toUInt32(number)) from numbers(4); +``` + +Result: + +```text + ┌─number─┬─joinGetOrNull('db_test.id_val', 'val', toUInt32(number))─┐ +1. │ 0 │ ᴺᵁᴸᴸ │ +2. │ 1 │ 11 │ +3. │ 2 │ 12 │ +4. │ 3 │ ᴺᵁᴸᴸ │ + └────────┴──────────────────────────────────────────────────────────┘ ``` ## catboostEvaluate diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 61e84ca72d1..66126e649c6 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -49,149 +49,3001 @@ SETTINGS cast_keep_nullable = 1 └──────────────────┴─────────────────────┴──────────────────┘ ``` -## toInt(8\|16\|32\|64\|128\|256) +## toInt8 -Converts an input value to a value the [Int](../data-types/int-uint.md) data type. This function family includes: +Converts an input value to a value of type [`Int8`](../data-types/int-uint.md). Throws an exception in case of an error. -- `toInt8(expr)` — Converts to a value of data type `Int8`. -- `toInt16(expr)` — Converts to a value of data type `Int16`. -- `toInt32(expr)` — Converts to a value of data type `Int32`. -- `toInt64(expr)` — Converts to a value of data type `Int64`. -- `toInt128(expr)` — Converts to a value of data type `Int128`. -- `toInt256(expr)` — Converts to a value of data type `Int256`. +**Syntax** + +```sql +toInt8(expr) +``` **Arguments** -- `expr` — [Expression](../syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt8('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int8](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +For example: `SELECT toInt8(128) == -128;`. +::: **Returned value** -Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` data type. +- 8-bit integer value. [Int8](../data-types/int-uint.md). -Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. - -The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#common-issues-with-data-conversion), when using the functions. +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: **Example** Query: -``` sql -SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8); +```sql +SELECT + toInt8(-8), + toInt8(-8.8), + toInt8('-8') +FORMAT vertical; ``` Result: ```response -┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐ -│ -9223372036854775808 │ 32 │ 16 │ 8 │ -└──────────────────────┴─────────────┴───────────────┴─────────────┘ +Row 1: +────── +toInt8(-8): -8 +toInt8(-8.8): -8 +toInt8('-8'): -8 ``` -## toInt(8\|16\|32\|64\|128\|256)OrZero +**See also** -Takes an argument of type [String](../data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`. +- [`toInt8OrZero`](#toint8orzero). +- [`toInt8OrNull`](#toint8ornull). +- [`toInt8OrDefault`](#toint8ordefault). -**Example** +## toInt8OrZero -Query: +Like [`toInt8`](#toint8), this function converts an input value to a value of type [Int8](../data-types/int-uint.md) but returns `0` in case of an error. -``` sql -SELECT toInt64OrZero('123123'), toInt8OrZero('123qwe123'); +**Syntax** + +```sql +toInt8OrZero(x) ``` -Result: - -```response -┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐ -│ 123123 │ 0 │ -└─────────────────────────┴───────────────────────────┘ -``` - -## toInt(8\|16\|32\|64\|128\|256)OrNull - -It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `NULL`. - -**Example** - -Query: - -``` sql -SELECT toInt64OrNull('123123'), toInt8OrNull('123qwe123'); -``` - -Result: - -```response -┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐ -│ 123123 │ ᴺᵁᴸᴸ │ -└─────────────────────────┴───────────────────────────┘ -``` - -## toInt(8\|16\|32\|64\|128\|256)OrDefault - -It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns the default type value. - -**Example** - -Query: - -``` sql -SELECT toInt64OrDefault('123123', cast('-1' as Int64)), toInt8OrDefault('123qwe123', cast('-1' as Int8)); -``` - -Result: - -```response -┌─toInt64OrDefault('123123', CAST('-1', 'Int64'))─┬─toInt8OrDefault('123qwe123', CAST('-1', 'Int8'))─┐ -│ 123123 │ -1 │ -└─────────────────────────────────────────────────┴──────────────────────────────────────────────────┘ -``` - - -## toUInt(8\|16\|32\|64\|256) - -Converts an input value to the [UInt](../data-types/int-uint.md) data type. This function family includes: - -- `toUInt8(expr)` — Converts to a value of data type `UInt8`. -- `toUInt16(expr)` — Converts to a value of data type `UInt16`. -- `toUInt32(expr)` — Converts to a value of data type `UInt32`. -- `toUInt64(expr)` — Converts to a value of data type `UInt64`. -- `toUInt256(expr)` — Converts to a value of data type `UInt256`. - **Arguments** -- `expr` — [Expression](../syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `0`): +- String representations of ordinary Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt8OrZero('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int8](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: **Returned value** -- Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type. +- 8-bit integer value if successful, otherwise `0`. [Int8](../data-types/int-uint.md). -Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. - -The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#common-issues-with-data-conversion), when using the functions. +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: **Example** Query: ``` sql -SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8); +SELECT + toInt8OrZero('-8'), + toInt8OrZero('abc') +FORMAT vertical; ``` Result: ```response -┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐ -│ 9223372036854775808 │ 4294967264 │ 16 │ 8 │ -└─────────────────────┴───────────────┴────────────────┴──────────────┘ +Row 1: +────── +toInt8OrZero('-8'): -8 +toInt8OrZero('abc'): 0 ``` -## toUInt(8\|16\|32\|64\|256)OrZero +**See also** -## toUInt(8\|16\|32\|64\|256)OrNull +- [`toInt8`](#toint8). +- [`toInt8OrNull`](#toint8ornull). +- [`toInt8OrDefault`](#toint8ordefault). -## toUInt(8\|16\|32\|64\|256)OrDefault +## toInt8OrNull + +Like [`toInt8`](#toint8), this function converts an input value to a value of type [Int8](../data-types/int-uint.md) but returns `NULL` in case of an error. + +**Syntax** + +```sql +toInt8OrNull(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `\N`) +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt8OrNull('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int8](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 8-bit integer value if successful, otherwise `NULL`. [Int8](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt8OrNull('-8'), + toInt8OrNull('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt8OrNull('-8'): -8 +toInt8OrNull('abc'): ᴺᵁᴸᴸ +``` + +**See also** + +- [`toInt8`](#toint8). +- [`toInt8OrZero`](#toint8orzero). +- [`toInt8OrDefault`](#toint8ordefault). + +## toInt8OrDefault + +Like [`toInt8`](#toint8), this function converts an input value to a value of type [Int8](../data-types/int-uint.md) but returns the default value in case of an error. +If no `default` value is passed then `0` is returned in case of an error. + +**Syntax** + +```sql +toInt8OrDefault(expr[, default]) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `default` (optional) — The default value to return if parsing to type `Int8` is unsuccessful. [Int8](../data-types/int-uint.md). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Arguments for which the default value is returned: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt8OrDefault('0xc0fe', CAST('-1', 'Int8'));`. + +:::note +If the input value cannot be represented within the bounds of [Int8](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 8-bit integer value if successful, otherwise returns the default value if passed or `0` if not. [Int8](../data-types/int-uint.md). + +:::note +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt8OrDefault('-8', CAST('-1', 'Int8')), + toInt8OrDefault('abc', CAST('-1', 'Int8')) +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt8OrDefault('-8', CAST('-1', 'Int8')): -8 +toInt8OrDefault('abc', CAST('-1', 'Int8')): -1 +``` + +**See also** + +- [`toInt8`](#toint8). +- [`toInt8OrZero`](#toint8orzero). +- [`toInt8OrNull`](#toint8orNull). + +## toInt16 + +Converts an input value to a value of type [`Int16`](../data-types/int-uint.md). Throws an exception in case of an error. + +**Syntax** + +```sql +toInt16(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt16('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +For example: `SELECT toInt16(32768) == -32768;`. +::: + +**Returned value** + +- 16-bit integer value. [Int16](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +```sql +SELECT + toInt16(-16), + toInt16(-16.16), + toInt16('-16') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt16(-16): -16 +toInt16(-16.16): -16 +toInt16('-16'): -16 +``` + +**See also** + +- [`toInt16OrZero`](#toint16orzero). +- [`toInt16OrNull`](#toint16ornull). +- [`toInt16OrDefault`](#toint16ordefault). + +## toInt16OrZero + +Like [`toInt16`](#toint16), this function converts an input value to a value of type [Int16](../data-types/int-uint.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toInt16OrZero(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `0`): +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt16OrZero('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered as an error. +::: + +**Returned value** + +- 16-bit integer value if successful, otherwise `0`. [Int16](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt16OrZero('-16'), + toInt16OrZero('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt16OrZero('-16'): -16 +toInt16OrZero('abc'): 0 +``` + +**See also** + +- [`toInt16`](#toint16). +- [`toInt16OrNull`](#toint16ornull). +- [`toInt16OrDefault`](#toint16ordefault). + +## toInt16OrNull + +Like [`toInt16`](#toint16), this function converts an input value to a value of type [Int16](../data-types/int-uint.md) but returns `NULL` in case of an error. + +**Syntax** + +```sql +toInt16OrNull(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `\N`) +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt16OrNull('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 16-bit integer value if successful, otherwise `NULL`. [Int16](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt16OrNull('-16'), + toInt16OrNull('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt16OrNull('-16'): -16 +toInt16OrNull('abc'): ᴺᵁᴸᴸ +``` + +**See also** + +- [`toInt16`](#toint16). +- [`toInt16OrZero`](#toint16orzero). +- [`toInt16OrDefault`](#toint16ordefault). + +## toInt16OrDefault + +Like [`toInt16`](#toint16), this function converts an input value to a value of type [Int16](../data-types/int-uint.md) but returns the default value in case of an error. +If no `default` value is passed then `0` is returned in case of an error. + +**Syntax** + +```sql +toInt16OrDefault(expr[, default]) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `default` (optional) — The default value to return if parsing to type `Int16` is unsuccessful. [Int16](../data-types/int-uint.md). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Arguments for which the default value is returned: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt16OrDefault('0xc0fe', CAST('-1', 'Int16'));`. + +:::note +If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 16-bit integer value if successful, otherwise returns the default value if passed or `0` if not. [Int16](../data-types/int-uint.md). + +:::note +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt16OrDefault('-16', CAST('-1', 'Int16')), + toInt16OrDefault('abc', CAST('-1', 'Int16')) +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt16OrDefault('-16', CAST('-1', 'Int16')): -16 +toInt16OrDefault('abc', CAST('-1', 'Int16')): -1 +``` + +**See also** + +- [`toInt16`](#toint16). +- [`toInt16OrZero`](#toint16orzero). +- [`toInt16OrNull`](#toint16ornull). + +## toInt32 + +Converts an input value to a value of type [`Int32`](../data-types/int-uint.md). Throws an exception in case of an error. + +**Syntax** + +```sql +toInt32(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt32('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), the result over or under flows. +This is not considered an error. +For example: `SELECT toInt32(2147483648) == -2147483648;` +::: + +**Returned value** + +- 32-bit integer value. [Int32](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +```sql +SELECT + toInt32(-32), + toInt32(-32.32), + toInt32('-32') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt32(-32): -32 +toInt32(-32.32): -32 +toInt32('-32'): -32 +``` + +**See also** + +- [`toInt32OrZero`](#toint32orzero). +- [`toInt32OrNull`](#toint32ornull). +- [`toInt32OrDefault`](#toint32ordefault). + +## toInt32OrZero + +Like [`toInt32`](#toint32), this function converts an input value to a value of type [Int32](../data-types/int-uint.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toInt32OrZero(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `0`): +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt32OrZero('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 32-bit integer value if successful, otherwise `0`. [Int32](../data-types/int-uint.md) + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncate fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt32OrZero('-32'), + toInt32OrZero('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt32OrZero('-32'): -32 +toInt32OrZero('abc'): 0 +``` +**See also** + +- [`toInt32`](#toint32). +- [`toInt32OrNull`](#toint32ornull). +- [`toInt32OrDefault`](#toint32ordefault). + +## toInt32OrNull + +Like [`toInt32`](#toint32), this function converts an input value to a value of type [Int32](../data-types/int-uint.md) but returns `NULL` in case of an error. + +**Syntax** + +```sql +toInt32OrNull(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `\N`) +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt32OrNull('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 32-bit integer value if successful, otherwise `NULL`. [Int32](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt32OrNull('-32'), + toInt32OrNull('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt32OrNull('-32'): -32 +toInt32OrNull('abc'): ᴺᵁᴸᴸ +``` + +**See also** + +- [`toInt32`](#toint32). +- [`toInt32OrZero`](#toint32orzero). +- [`toInt32OrDefault`](#toint32ordefault). + +## toInt32OrDefault + +Like [`toInt32`](#toint32), this function converts an input value to a value of type [Int32](../data-types/int-uint.md) but returns the default value in case of an error. +If no `default` value is passed then `0` is returned in case of an error. + +**Syntax** + +```sql +toInt32OrDefault(expr[, default]) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `default` (optional) — The default value to return if parsing to type `Int32` is unsuccessful. [Int32](../data-types/int-uint.md). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Arguments for which the default value is returned: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt32OrDefault('0xc0fe', CAST('-1', 'Int32'));`. + +:::note +If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 32-bit integer value if successful, otherwise returns the default value if passed or `0` if not. [Int32](../data-types/int-uint.md). + +:::note +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt32OrDefault('-32', CAST('-1', 'Int32')), + toInt32OrDefault('abc', CAST('-1', 'Int32')) +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt32OrDefault('-32', CAST('-1', 'Int32')): -32 +toInt32OrDefault('abc', CAST('-1', 'Int32')): -1 +``` + +**See also** + +- [`toInt32`](#toint32). +- [`toInt32OrZero`](#toint32orzero). +- [`toInt32OrNull`](#toint32ornull). + +## toInt64 + +Converts an input value to a value of type [`Int64`](../data-types/int-uint.md). Throws an exception in case of an error. + +**Syntax** + +```sql +toInt64(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Unsupported types: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt64('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md), the result over or under flows. +This is not considered an error. +For example: `SELECT toInt64(9223372036854775808) == -9223372036854775808;` +::: + +**Returned value** + +- 64-bit integer value. [Int64](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +```sql +SELECT + toInt64(-64), + toInt64(-64.64), + toInt64('-64') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt64(-64): -64 +toInt64(-64.64): -64 +toInt64('-64'): -64 +``` + +**See also** + +- [`toInt64OrZero`](#toint64orzero). +- [`toInt64OrNull`](#toint64ornull). +- [`toInt64OrDefault`](#toint64ordefault). + +## toInt64OrZero + +Like [`toInt64`](#toint64), this function converts an input value to a value of type [Int64](../data-types/int-uint.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toInt64OrZero(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `0`): +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt64OrZero('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 64-bit integer value if successful, otherwise `0`. [Int64](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt64OrZero('-64'), + toInt64OrZero('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt64OrZero('-64'): -64 +toInt64OrZero('abc'): 0 +``` + +**See also** + +- [`toInt64`](#toint64). +- [`toInt64OrNull`](#toint64ornull). +- [`toInt64OrDefault`](#toint64ordefault). + +## toInt64OrNull + +Like [`toInt64`](#toint64), this function converts an input value to a value of type [Int64](../data-types/int-uint.md) but returns `NULL` in case of an error. + +**Syntax** + +```sql +toInt64OrNull(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `\N`) +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt64OrNull('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 64-bit integer value if successful, otherwise `NULL`. [Int64](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt64OrNull('-64'), + toInt64OrNull('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt64OrNull('-64'): -64 +toInt64OrNull('abc'): ᴺᵁᴸᴸ +``` + +**See also** + +- [`toInt64`](#toint64). +- [`toInt64OrZero`](#toint64orzero). +- [`toInt64OrDefault`](#toint64ordefault). + +## toInt64OrDefault + +Like [`toInt64`](#toint64), this function converts an input value to a value of type [Int64](../data-types/int-uint.md) but returns the default value in case of an error. +If no `default` value is passed then `0` is returned in case of an error. + +**Syntax** + +```sql +toInt64OrDefault(expr[, default]) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `default` (optional) — The default value to return if parsing to type `Int64` is unsuccessful. [Int64](../data-types/int-uint.md). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Arguments for which the default value is returned: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt64OrDefault('0xc0fe', CAST('-1', 'Int64'));`. + +:::note +If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 64-bit integer value if successful, otherwise returns the default value if passed or `0` if not. [Int64](../data-types/int-uint.md). + +:::note +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt64OrDefault('-64', CAST('-1', 'Int64')), + toInt64OrDefault('abc', CAST('-1', 'Int64')) +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt64OrDefault('-64', CAST('-1', 'Int64')): -64 +toInt64OrDefault('abc', CAST('-1', 'Int64')): -1 +``` + +**See also** + +- [`toInt64`](#toint64). +- [`toInt64OrZero`](#toint64orzero). +- [`toInt64OrNull`](#toint64ornull). + +## toInt128 + +Converts an input value to a value of type [`Int128`](../data-types/int-uint.md). Throws an exception in case of an error. + +**Syntax** + +```sql +toInt128(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt128('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md), the result over or under flows. +This is not considered an error. +::: + +**Returned value** + +- 128-bit integer value. [Int128](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +```sql +SELECT + toInt128(-128), + toInt128(-128.8), + toInt128('-128') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt128(-128): -128 +toInt128(-128.8): -128 +toInt128('-128'): -128 +``` + +**See also** + +- [`toInt128OrZero`](#toint128orzero). +- [`toInt128OrNull`](#toint128ornull). +- [`toInt128OrDefault`](#toint128ordefault). + +## toInt128OrZero + +Like [`toInt128`](#toint128), this function converts an input value to a value of type [Int128](../data-types/int-uint.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toInt128OrZero(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `0`): +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt128OrZero('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 128-bit integer value if successful, otherwise `0`. [Int128](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt128OrZero('-128'), + toInt128OrZero('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt128OrZero('-128'): -128 +toInt128OrZero('abc'): 0 +``` + +**See also** + +- [`toInt128`](#toint128). +- [`toInt128OrNull`](#toint128ornull). +- [`toInt128OrDefault`](#toint128ordefault). + +## toInt128OrNull + +Like [`toInt128`](#toint128), this function converts an input value to a value of type [Int128](../data-types/int-uint.md) but returns `NULL` in case of an error. + +**Syntax** + +```sql +toInt128OrNull(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `\N`) +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt128OrNull('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 128-bit integer value if successful, otherwise `NULL`. [Int128](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt128OrNull('-128'), + toInt128OrNull('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt128OrNull('-128'): -128 +toInt128OrNull('abc'): ᴺᵁᴸᴸ +``` + +**See also** + +- [`toInt128`](#toint128). +- [`toInt128OrZero`](#toint128orzero). +- [`toInt128OrDefault`](#toint128ordefault). + +## toInt128OrDefault + +Like [`toInt128`](#toint128), this function converts an input value to a value of type [Int128](../data-types/int-uint.md) but returns the default value in case of an error. +If no `default` value is passed then `0` is returned in case of an error. + +**Syntax** + +```sql +toInt128OrDefault(expr[, default]) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `default` (optional) — The default value to return if parsing to type `Int128` is unsuccessful. [Int128](../data-types/int-uint.md). + +Supported arguments: +- (U)Int8/16/32/64/128/256. +- Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Arguments for which the default value is returned: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt128OrDefault('0xc0fe', CAST('-1', 'Int128'));`. + +:::note +If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 128-bit integer value if successful, otherwise returns the default value if passed or `0` if not. [Int128](../data-types/int-uint.md). + +:::note +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt128OrDefault('-128', CAST('-1', 'Int128')), + toInt128OrDefault('abc', CAST('-1', 'Int128')) +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt128OrDefault('-128', CAST('-1', 'Int128')): -128 +toInt128OrDefault('abc', CAST('-1', 'Int128')): -1 +``` + +**See also** + +- [`toInt128`](#toint128). +- [`toInt128OrZero`](#toint128orzero). +- [`toInt128OrNull`](#toint128ornull). + +## toInt256 + +Converts an input value to a value of type [`Int256`](../data-types/int-uint.md). Throws an exception in case of an error. + +**Syntax** + +```sql +toInt256(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt256('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md), the result over or under flows. +This is not considered an error. +::: + +**Returned value** + +- 256-bit integer value. [Int256](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +```sql +SELECT + toInt256(-256), + toInt256(-256.256), + toInt256('-256') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt256(-256): -256 +toInt256(-256.256): -256 +toInt256('-256'): -256 +``` + +**See also** + +- [`toInt256OrZero`](#toint256orzero). +- [`toInt256OrNull`](#toint256ornull). +- [`toInt256OrDefault`](#toint256ordefault). + +## toInt256OrZero + +Like [`toInt256`](#toint256), this function converts an input value to a value of type [Int256](../data-types/int-uint.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toInt256OrZero(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `0`): +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt256OrZero('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 256-bit integer value if successful, otherwise `0`. [Int256](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt256OrZero('-256'), + toInt256OrZero('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt256OrZero('-256'): -256 +toInt256OrZero('abc'): 0 +``` + +**See also** + +- [`toInt256`](#toint256). +- [`toInt256OrNull`](#toint256ornull). +- [`toInt256OrDefault`](#toint256ordefault). + +## toInt256OrNull + +Like [`toInt256`](#toint256), this function converts an input value to a value of type [Int256](../data-types/int-uint.md) but returns `NULL` in case of an error. + +**Syntax** + +```sql +toInt256OrNull(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `\N`) +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toInt256OrNull('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 256-bit integer value if successful, otherwise `NULL`. [Int256](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt256OrNull('-256'), + toInt256OrNull('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt256OrNull('-256'): -256 +toInt256OrNull('abc'): ᴺᵁᴸᴸ +``` + +**See also** + +- [`toInt256`](#toint256). +- [`toInt256OrZero`](#toint256orzero). +- [`toInt256OrDefault`](#toint256ordefault). + +## toInt256OrDefault + +Like [`toInt256`](#toint256), this function converts an input value to a value of type [Int256](../data-types/int-uint.md) but returns the default value in case of an error. +If no `default` value is passed then `0` is returned in case of an error. + +**Syntax** + +```sql +toInt256OrDefault(expr[, default]) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `default` (optional) — The default value to return if parsing to type `Int256` is unsuccessful. [Int256](../data-types/int-uint.md). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Arguments for which the default value is returned: +- String representations of Float32/64 values, including `NaN` and `Inf` +- String representations of binary and hexadecimal values, e.g. `SELECT toInt256OrDefault('0xc0fe', CAST('-1', 'Int256'));` + +:::note +If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 256-bit integer value if successful, otherwise returns the default value if passed or `0` if not. [Int256](../data-types/int-uint.md). + +:::note +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +**Example** + +Query: + +``` sql +SELECT + toInt256OrDefault('-256', CAST('-1', 'Int256')), + toInt256OrDefault('abc', CAST('-1', 'Int256')) +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toInt256OrDefault('-256', CAST('-1', 'Int256')): -256 +toInt256OrDefault('abc', CAST('-1', 'Int256')): -1 +``` + +**See also** + +- [`toInt256`](#toint256). +- [`toInt256OrZero`](#toint256orzero). +- [`toInt256OrNull`](#toint256ornull). + +# toUInt8 + +Converts an input value to a value of type [`UInt8`](../data-types/int-uint.md). Throws an exception in case of an error. + +**Syntax** + +```sql +toUInt8(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt8('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt8](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +For example: `SELECT toUInt8(256) == 0;`. +::: + +**Returned value** + +- 8-bit unsigned integer value. [UInt8](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +```sql +SELECT + toUInt8(8), + toUInt8(8.8), + toUInt8('8') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt8(8): 8 +toUInt8(8.8): 8 +toUInt8('8'): 8 +``` + +**See also** + +- [`toUInt8OrZero`](#touint8orzero). +- [`toUInt8OrNull`](#touint8ornull). +- [`toUInt8OrDefault`](#touint8ordefault). + +## toUInt8OrZero + +Like [`toUInt8`](#touint8), this function converts an input value to a value of type [UInt8](../data-types/int-uint.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toUInt8OrZero(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `0`): +- String representations of ordinary Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt8OrZero('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt8](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 8-bit unsigned integer value if successful, otherwise `0`. [UInt8](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt8OrZero('-8'), + toUInt8OrZero('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt8OrZero('-8'): 0 +toUInt8OrZero('abc'): 0 +``` + +**See also** + +- [`toUInt8`](#touint8). +- [`toUInt8OrNull`](#touint8ornull). +- [`toUInt8OrDefault`](#touint8ordefault). + +## toUInt8OrNull + +Like [`toUInt8`](#touint8), this function converts an input value to a value of type [UInt8](../data-types/int-uint.md) but returns `NULL` in case of an error. + +**Syntax** + +```sql +toUInt8OrNull(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `\N`) +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt8OrNull('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt8](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 8-bit unsigned integer value if successful, otherwise `NULL`. [UInt8](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt8OrNull('8'), + toUInt8OrNull('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt8OrNull('8'): 8 +toUInt8OrNull('abc'): ᴺᵁᴸᴸ +``` + +**See also** + +- [`toUInt8`](#touint8). +- [`toUInt8OrZero`](#touint8orzero). +- [`toUInt8OrDefault`](#touint8ordefault). + +## toUInt8OrDefault + +Like [`toUInt8`](#touint8), this function converts an input value to a value of type [UInt8](../data-types/int-uint.md) but returns the default value in case of an error. +If no `default` value is passed then `0` is returned in case of an error. + +**Syntax** + +```sql +toUInt8OrDefault(expr[, default]) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `default` (optional) — The default value to return if parsing to type `UInt8` is unsuccessful. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Arguments for which the default value is returned: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt8OrDefault('0xc0fe', CAST('0', 'UInt8'));`. + +:::note +If the input value cannot be represented within the bounds of [UInt8](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 8-bit unsigned integer value if successful, otherwise returns the default value if passed or `0` if not. [UInt8](../data-types/int-uint.md). + +:::note +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt8OrDefault('8', CAST('0', 'UInt8')), + toUInt8OrDefault('abc', CAST('0', 'UInt8')) +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt8OrDefault('8', CAST('0', 'UInt8')): 8 +toUInt8OrDefault('abc', CAST('0', 'UInt8')): 0 +``` + +**See also** + +- [`toUInt8`](#touint8). +- [`toUInt8OrZero`](#touint8orzero). +- [`toUInt8OrNull`](#touint8orNull). + +## toUInt16 + +Converts an input value to a value of type [`UInt16`](../data-types/int-uint.md). Throws an exception in case of an error. + +**Syntax** + +```sql +toUInt16(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt16('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt16](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +For example: `SELECT toUInt16(65536) == 0;`. +::: + +**Returned value** + +- 16-bit unsigned integer value. [UInt16](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +```sql +SELECT + toUInt16(16), + toUInt16(16.16), + toUInt16('16') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt16(16): 16 +toUInt16(16.16): 16 +toUInt16('16'): 16 +``` + +**See also** + +- [`toUInt16OrZero`](#touint16orzero). +- [`toUInt16OrNull`](#touint16ornull). +- [`toUInt16OrDefault`](#touint16ordefault). + +## toUInt16OrZero + +Like [`toUInt16`](#touint16), this function converts an input value to a value of type [UInt16](../data-types/int-uint.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toUInt16OrZero(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `0`): +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt16OrZero('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt16](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered as an error. +::: + +**Returned value** + +- 16-bit unsigned integer value if successful, otherwise `0`. [UInt16](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt16OrZero('16'), + toUInt16OrZero('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt16OrZero('16'): 16 +toUInt16OrZero('abc'): 0 +``` + +**See also** + +- [`toUInt16`](#touint16). +- [`toUInt16OrNull`](#touint16ornull). +- [`toUInt16OrDefault`](#touint16ordefault). + +## toUInt16OrNull + +Like [`toUInt16`](#touint16), this function converts an input value to a value of type [UInt16](../data-types/int-uint.md) but returns `NULL` in case of an error. + +**Syntax** + +```sql +toUInt16OrNull(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `\N`) +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt16OrNull('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt16](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 16-bit unsigned integer value if successful, otherwise `NULL`. [UInt16](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt16OrNull('16'), + toUInt16OrNull('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt16OrNull('16'): 16 +toUInt16OrNull('abc'): ᴺᵁᴸᴸ +``` + +**See also** + +- [`toUInt16`](#touint16). +- [`toUInt16OrZero`](#touint16orzero). +- [`toUInt16OrDefault`](#touint16ordefault). + +## toUInt16OrDefault + +Like [`toUInt16`](#touint16), this function converts an input value to a value of type [UInt16](../data-types/int-uint.md) but returns the default value in case of an error. +If no `default` value is passed then `0` is returned in case of an error. + +**Syntax** + +```sql +toUInt16OrDefault(expr[, default]) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `default` (optional) — The default value to return if parsing to type `UInt16` is unsuccessful. [UInt16](../data-types/int-uint.md). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Arguments for which the default value is returned: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt16OrDefault('0xc0fe', CAST('0', 'UInt16'));`. + +:::note +If the input value cannot be represented within the bounds of [UInt16](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 16-bit unsigned integer value if successful, otherwise returns the default value if passed or `0` if not. [UInt16](../data-types/int-uint.md). + +:::note +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt16OrDefault('16', CAST('0', 'UInt16')), + toUInt16OrDefault('abc', CAST('0', 'UInt16')) +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt16OrDefault('16', CAST('0', 'UInt16')): 16 +toUInt16OrDefault('abc', CAST('0', 'UInt16')): 0 +``` + +**See also** + +- [`toUInt16`](#touint16). +- [`toUInt16OrZero`](#touint16orzero). +- [`toUInt16OrNull`](#touint16ornull). + +## toUInt32 + +Converts an input value to a value of type [`UInt32`](../data-types/int-uint.md). Throws an exception in case of an error. + +**Syntax** + +```sql +toUInt32(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt32('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt32](../data-types/int-uint.md), the result over or under flows. +This is not considered an error. +For example: `SELECT toUInt32(4294967296) == 0;` +::: + +**Returned value** + +- 32-bit unsigned integer value. [UInt32](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +```sql +SELECT + toUInt32(32), + toUInt32(32.32), + toUInt32('32') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt32(32): 32 +toUInt32(32.32): 32 +toUInt32('32'): 32 +``` + +**See also** + +- [`toUInt32OrZero`](#touint32orzero). +- [`toUInt32OrNull`](#touint32ornull). +- [`toUInt32OrDefault`](#touint32ordefault). + +## toUInt32OrZero + +Like [`toUInt32`](#touint32), this function converts an input value to a value of type [UInt32](../data-types/int-uint.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toUInt32OrZero(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `0`): +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt32OrZero('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt32](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 32-bit unsigned integer value if successful, otherwise `0`. [UInt32](../data-types/int-uint.md) + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero) +, meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt32OrZero('32'), + toUInt32OrZero('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt32OrZero('32'): 32 +toUInt32OrZero('abc'): 0 +``` +**See also** + +- [`toUInt32`](#touint32). +- [`toUInt32OrNull`](#touint32ornull). +- [`toUInt32OrDefault`](#touint32ordefault). + +## toUInt32OrNull + +Like [`toUInt32`](#touint32), this function converts an input value to a value of type [UInt32](../data-types/int-uint.md) but returns `NULL` in case of an error. + +**Syntax** + +```sql +toUInt32OrNull(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `\N`) +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt32OrNull('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt32](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 32-bit unsigned integer value if successful, otherwise `NULL`. [UInt32](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero) +, meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt32OrNull('32'), + toUInt32OrNull('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt32OrNull('32'): 32 +toUInt32OrNull('abc'): ᴺᵁᴸᴸ +``` + +**See also** + +- [`toUInt32`](#touint32). +- [`toUInt32OrZero`](#touint32orzero). +- [`toUInt32OrDefault`](#touint32ordefault). + +## toUInt32OrDefault + +Like [`toUInt32`](#touint32), this function converts an input value to a value of type [UInt32](../data-types/int-uint.md) but returns the default value in case of an error. +If no `default` value is passed then `0` is returned in case of an error. + +**Syntax** + +```sql +toUInt32OrDefault(expr[, default]) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `default` (optional) — The default value to return if parsing to type `UInt32` is unsuccessful. [UInt32](../data-types/int-uint.md). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Arguments for which the default value is returned: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt32OrDefault('0xc0fe', CAST('0', 'UInt32'));`. + +:::note +If the input value cannot be represented within the bounds of [UInt32](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 32-bit unsigned integer value if successful, otherwise returns the default value if passed or `0` if not. [UInt32](../data-types/int-uint.md). + +:::note +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt32OrDefault('32', CAST('0', 'UInt32')), + toUInt32OrDefault('abc', CAST('0', 'UInt32')) +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt32OrDefault('32', CAST('0', 'UInt32')): 32 +toUInt32OrDefault('abc', CAST('0', 'UInt32')): 0 +``` + +**See also** + +- [`toUInt32`](#touint32). +- [`toUInt32OrZero`](#touint32orzero). +- [`toUInt32OrNull`](#touint32ornull). + +## toUInt64 + +Converts an input value to a value of type [`UInt64`](../data-types/int-uint.md). Throws an exception in case of an error. + +**Syntax** + +```sql +toUInt64(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Unsupported types: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt64('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt64](../data-types/int-uint.md), the result over or under flows. +This is not considered an error. +For example: `SELECT toUInt64(18446744073709551616) == 0;` +::: + +**Returned value** + +- 64-bit unsigned integer value. [UInt64](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +```sql +SELECT + toUInt64(64), + toUInt64(64.64), + toUInt64('64') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt64(64): 64 +toUInt64(64.64): 64 +toUInt64('64'): 64 +``` + +**See also** + +- [`toUInt64OrZero`](#touint64orzero). +- [`toUInt64OrNull`](#touint64ornull). +- [`toUInt64OrDefault`](#touint64ordefault). + +## toInt64OrZero + +Like [`toUInt64`](#touint64), this function converts an input value to a value of type [UInt64](../data-types/int-uint.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toUInt64OrZero(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `0`): +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt64OrZero('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt64](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 64-bit unsigned integer value if successful, otherwise `0`. [UInt64](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt64OrZero('64'), + toUInt64OrZero('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt64OrZero('64'): 64 +toUInt64OrZero('abc'): 0 +``` + +**See also** + +- [`toUInt64`](#touint64). +- [`toUInt64OrNull`](#touint64ornull). +- [`toUInt64OrDefault`](#touint64ordefault). + +## toUInt64OrNull + +Like [`toUInt64`](#touint64), this function converts an input value to a value of type [UInt64](../data-types/int-uint.md) but returns `NULL` in case of an error. + +**Syntax** + +```sql +toUInt64OrNull(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `\N`) +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt64OrNull('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt64](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 64-bit unsigned integer value if successful, otherwise `NULL`. [UInt64](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt64OrNull('64'), + toUInt64OrNull('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt64OrNull('64'): 64 +toUInt64OrNull('abc'): ᴺᵁᴸᴸ +``` + +**See also** + +- [`toUInt64`](#touint64). +- [`toUInt64OrZero`](#touint64orzero). +- [`toUInt64OrDefault`](#touint64ordefault). + +## toUInt64OrDefault + +Like [`toUInt64`](#touint64), this function converts an input value to a value of type [UInt64](../data-types/int-uint.md) but returns the default value in case of an error. +If no `default` value is passed then `0` is returned in case of an error. + +**Syntax** + +```sql +toUInt64OrDefault(expr[, default]) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `defauult` (optional) — The default value to return if parsing to type `UInt64` is unsuccessful. [UInt64](../data-types/int-uint.md). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Arguments for which the default value is returned: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt64OrDefault('0xc0fe', CAST('0', 'UInt64'));`. + +:::note +If the input value cannot be represented within the bounds of [UInt64](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 64-bit unsigned integer value if successful, otherwise returns the default value if passed or `0` if not. [UInt64](../data-types/int-uint.md). + +:::note +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt64OrDefault('64', CAST('0', 'UInt64')), + toUInt64OrDefault('abc', CAST('0', 'UInt64')) +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt64OrDefault('64', CAST('0', 'UInt64')): 64 +toUInt64OrDefault('abc', CAST('0', 'UInt64')): 0 +``` + +**See also** + +- [`toUInt64`](#touint64). +- [`toUInt64OrZero`](#touint64orzero). +- [`toUInt64OrNull`](#touint64ornull). + +## toUInt128 + +Converts an input value to a value of type [`UInt128`](../data-types/int-uint.md). Throws an exception in case of an error. + +**Syntax** + +```sql +toUInt128(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt128('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt128](../data-types/int-uint.md), the result over or under flows. +This is not considered an error. +::: + +**Returned value** + +- 128-bit unsigned integer value. [UInt128](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +```sql +SELECT + toUInt128(128), + toUInt128(128.8), + toUInt128('128') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt128(128): 128 +toUInt128(128.8): 128 +toUInt128('128'): 128 +``` + +**See also** + +- [`toUInt128OrZero`](#touint128orzero). +- [`toUInt128OrNull`](#touint128ornull). +- [`toUInt128OrDefault`](#touint128ordefault). + +## toUInt128OrZero + +Like [`toUInt128`](#touint128), this function converts an input value to a value of type [UInt128](../data-types/int-uint.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toUInt128OrZero(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `0`): +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt128OrZero('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt128](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 128-bit unsigned integer value if successful, otherwise `0`. [UInt128](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt128OrZero('128'), + toUInt128OrZero('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt128OrZero('128'): 128 +toUInt128OrZero('abc'): 0 +``` + +**See also** + +- [`toUInt128`](#touint128). +- [`toUInt128OrNull`](#touint128ornull). +- [`toUInt128OrDefault`](#touint128ordefault). + +## toUInt128OrNull + +Like [`toUInt128`](#touint128), this function converts an input value to a value of type [UInt128](../data-types/int-uint.md) but returns `NULL` in case of an error. + +**Syntax** + +```sql +toUInt128OrNull(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `\N`) +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt128OrNull('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt128](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 128-bit unsigned integer value if successful, otherwise `NULL`. [UInt128](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt128OrNull('128'), + toUInt128OrNull('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt128OrNull('128'): 128 +toUInt128OrNull('abc'): ᴺᵁᴸᴸ +``` + +**See also** + +- [`toUInt128`](#touint128). +- [`toUInt128OrZero`](#touint128orzero). +- [`toUInt128OrDefault`](#touint128ordefault). + +## toUInt128OrDefault + +Like [`toUInt128`](#toint128), this function converts an input value to a value of type [UInt128](../data-types/int-uint.md) but returns the default value in case of an error. +If no `default` value is passed then `0` is returned in case of an error. + +**Syntax** + +```sql +toUInt128OrDefault(expr[, default]) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `default` (optional) — The default value to return if parsing to type `UInt128` is unsuccessful. [UInt128](../data-types/int-uint.md). + +Supported arguments: +- (U)Int8/16/32/64/128/256. +- Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Arguments for which the default value is returned: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt128OrDefault('0xc0fe', CAST('0', 'UInt128'));`. + +:::note +If the input value cannot be represented within the bounds of [UInt128](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 128-bit unsigned integer value if successful, otherwise returns the default value if passed or `0` if not. [UInt128](../data-types/int-uint.md). + +:::note +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt128OrDefault('128', CAST('0', 'UInt128')), + toUInt128OrDefault('abc', CAST('0', 'UInt128')) +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt128OrDefault('128', CAST('0', 'UInt128')): 128 +toUInt128OrDefault('abc', CAST('0', 'UInt128')): 0 +``` + +**See also** + +- [`toUInt128`](#touint128). +- [`toUInt128OrZero`](#touint128orzero). +- [`toUInt128OrNull`](#touint128ornull). + +## toUInt256 + +Converts an input value to a value of type [`UInt256`](../data-types/int-uint.md). Throws an exception in case of an error. + +**Syntax** + +```sql +toUInt256(expr) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments: +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt256('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt256](../data-types/int-uint.md), the result over or under flows. +This is not considered an error. +::: + +**Returned value** + +- 256-bit unsigned integer value. [Int256](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +```sql +SELECT + toUInt256(256), + toUInt256(256.256), + toUInt256('256') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt256(256): 256 +toUInt256(256.256): 256 +toUInt256('256'): 256 +``` + +**See also** + +- [`toUInt256OrZero`](#touint256orzero). +- [`toUInt256OrNull`](#touint256ornull). +- [`toUInt256OrDefault`](#touint256ordefault). + +## toUInt256OrZero + +Like [`toUInt256`](#touint256), this function converts an input value to a value of type [UInt256](../data-types/int-uint.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toUInt256OrZero(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `0`): +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt256OrZero('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt256](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 256-bit unsigned integer value if successful, otherwise `0`. [UInt256](../data-types/int-uint.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt256OrZero('256'), + toUInt256OrZero('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt256OrZero('256'): 256 +toUInt256OrZero('abc'): 0 +``` + +**See also** + +- [`toUInt256`](#touint256). +- [`toUInt256OrNull`](#touint256ornull). +- [`toUInt256OrDefault`](#touint256ordefault). + +## toUInt256OrNull + +Like [`toUInt256`](#touint256), this function converts an input value to a value of type [UInt256](../data-types/int-uint.md) but returns `NULL` in case of an error. + +**Syntax** + +```sql +toUInt256OrNull(x) +``` + +**Arguments** + +- `x` — A String representation of a number. [String](../data-types/string.md). + +Supported arguments: +- String representations of (U)Int8/16/32/128/256. + +Unsupported arguments (return `\N`) +- String representations of Float32/64 values, including `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt256OrNull('0xc0fe');`. + +:::note +If the input value cannot be represented within the bounds of [UInt256](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 256-bit unsigned integer value if successful, otherwise `NULL`. [UInt256](../data-types/int-uint.md) / [NULL](../data-types/nullable.md). + +:::note +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt256OrNull('256'), + toUInt256OrNull('abc') +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt256OrNull('256'): 256 +toUInt256OrNull('abc'): ᴺᵁᴸᴸ +``` + +**See also** + +- [`toUInt256`](#touint256). +- [`toUInt256OrZero`](#touint256orzero). +- [`toUInt256OrDefault`](#touint256ordefault). + +## toUInt256OrDefault + +Like [`toUInt256`](#touint256), this function converts an input value to a value of type [UInt256](../data-types/int-uint.md) but returns the default value in case of an error. +If no `default` value is passed then `0` is returned in case of an error. + +**Syntax** + +```sql +toUInt256OrDefault(expr[, default]) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions) / [String](../data-types/string.md). +- `default` (optional) — The default value to return if parsing to type `UInt256` is unsuccessful. [UInt256](../data-types/int-uint.md). + +Supported arguments: +- Values of type (U)Int8/16/32/64/128/256. +- Values of type Float32/64. +- String representations of (U)Int8/16/32/128/256. + +Arguments for which the default value is returned: +- String representations of Float32/64 values, including `NaN` and `Inf` +- String representations of binary and hexadecimal values, e.g. `SELECT toUInt256OrDefault('0xc0fe', CAST('0', 'UInt256'));` + +:::note +If the input value cannot be represented within the bounds of [UInt256](../data-types/int-uint.md), overflow or underflow of the result occurs. +This is not considered an error. +::: + +**Returned value** + +- 256-bit unsigned integer value if successful, otherwise returns the default value if passed or `0` if not. [UInt256](../data-types/int-uint.md). + +:::note +- The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers. +- The default value type should be the same as the cast type. +::: + +**Example** + +Query: + +``` sql +SELECT + toUInt256OrDefault('-256', CAST('0', 'UInt256')), + toUInt256OrDefault('abc', CAST('0', 'UInt256')) +FORMAT vertical; +``` + +Result: + +```response +Row 1: +────── +toUInt256OrDefault('-256', CAST('0', 'UInt256')): 0 +toUInt256OrDefault('abc', CAST('0', 'UInt256')): 0 +``` + +**See also** + +- [`toUInt256`](#touint256). +- [`toUInt256OrZero`](#touint256orzero). +- [`toUInt256OrNull`](#touint256ornull). ## toFloat(32\|64) @@ -2289,7 +5141,7 @@ Result: └─────────────────────┴─────────────────┴─────────────────────────────────────┘ ``` -**See Also** +**See also** - [RFC 1123](https://datatracker.ietf.org/doc/html/rfc1123) - [toDate](#todate) diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index aa6f132e08e..2e9b0cf3080 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -36,7 +36,7 @@ These actions are described in detail below. ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after | FIRST] ``` -Adds a new column to the table with the specified `name`, `type`, [`codec`](../create/table.md/#codecs) and `default_expr` (see the section [Default expressions](/docs/en/sql-reference/statements/create/table.md/#create-default-values)). +Adds a new column to the table with the specified `name`, `type`, [`codec`](../create/table.md/#column_compression_codec) and `default_expr` (see the section [Default expressions](/docs/en/sql-reference/statements/create/table.md/#create-default-values)). If the `IF NOT EXISTS` clause is included, the query won’t return an error if the column already exists. If you specify `AFTER name_after` (the name of another column), the column is added after the specified one in the list of table columns. If you want to add a column to the beginning of the table use the `FIRST` clause. Otherwise, the column is added to the end of the table. For a chain of actions, `name_after` can be the name of a column that is added in one of the previous actions. @@ -155,7 +155,7 @@ This query changes the `name` column properties: - Column-level Settings -For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#codecs). +For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#column_compression_codec). For examples of columns TTL modifying, see [Column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl). diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index b866d0b9f5f..9c8984d698f 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -21,7 +21,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [COMMENT 'comment for column'] [compression_codec] [TTL expr2], ... ) ENGINE = engine - COMMENT 'comment for table' + [COMMENT 'comment for table'] ``` Creates a table named `table_name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine. @@ -626,11 +626,6 @@ SELECT * FROM base.t1; You can add a comment to the table when you creating it. -:::note -The comment clause is supported by all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md). -::: - - **Syntax** ``` sql diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 1fabb6d8cc7..2931f7020fb 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -16,6 +16,7 @@ Syntax: CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] [DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }] AS SELECT ... +[COMMENT 'comment'] ``` Normal views do not store any data. They just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause. @@ -57,6 +58,7 @@ SELECT * FROM view(column1=value1, column2=value2 ...) CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] [DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }] AS SELECT ... +[COMMENT 'comment'] ``` :::tip @@ -161,6 +163,7 @@ RANDOMIZE FOR interval DEPENDS ON [db.]name [, [db.]name [, ...]] [TO[db.]name] [(columns)] [ENGINE = engine] [EMPTY] AS SELECT ... +[COMMENT 'comment'] ``` where `interval` is a sequence of simple intervals: ```sql @@ -267,7 +270,10 @@ This is an experimental feature that may change in backwards-incompatible ways i ::: ``` sql -CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [INNER ENGINE engine] [ENGINE engine] [WATERMARK strategy] [ALLOWED_LATENESS interval_function] [POPULATE] AS SELECT ... GROUP BY time_window_function +CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [INNER ENGINE engine] [ENGINE engine] [WATERMARK strategy] [ALLOWED_LATENESS interval_function] [POPULATE] +AS SELECT ... +GROUP BY time_window_function +[COMMENT 'comment'] ``` Window view can aggregate data by time window and output the results when the window is ready to fire. It stores the partial aggregation results in an inner(or specified) table to reduce latency and can push the processing result to a specified table or push notifications using the WATCH query. diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 34c6016235a..96d9d26977d 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -297,7 +297,7 @@ Algorithm requires the special column in tables. This column: - Must contain an ordered sequence. - Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md). -- Can’t be the only column in the `JOIN` clause. +- For `hash` join algorithm it can’t be the only column in the `JOIN` clause. Syntax `ASOF JOIN ... ON`: @@ -337,7 +337,8 @@ For example, consider the following tables: `ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` can’t be joined. :::note -`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. +`ASOF JOIN` is supported only by `hash` and `full_sorting_merge` join algorithms. +It's **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. ::: ## PASTE JOIN Usage diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index e6d3439d2b9..35f2f15dd80 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -18,10 +18,21 @@ Reloads all dictionaries that have been successfully loaded before. By default, dictionaries are loaded lazily (see [dictionaries_lazy_load](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load)), so instead of being loaded automatically at startup, they are initialized on first access through dictGet function or SELECT from tables with ENGINE = Dictionary. The `SYSTEM RELOAD DICTIONARIES` query reloads such dictionaries (LOADED). Always returns `Ok.` regardless of the result of the dictionary update. +**Syntax** + +```sql +SYSTEM RELOAD DICTIONARIES [ON CLUSTER cluster_name] +``` + ## RELOAD DICTIONARY Completely reloads a dictionary `dictionary_name`, regardless of the state of the dictionary (LOADED / NOT_LOADED / FAILED). Always returns `Ok.` regardless of the result of updating the dictionary. + +``` sql +SYSTEM RELOAD DICTIONARY [ON CLUSTER cluster_name] dictionary_name +``` + The status of the dictionary can be checked by querying the `system.dictionaries` table. ``` sql diff --git a/docs/en/sql-reference/table-functions/fuzzQuery.md b/docs/en/sql-reference/table-functions/fuzzQuery.md deleted file mode 100644 index e15f8a40156..00000000000 --- a/docs/en/sql-reference/table-functions/fuzzQuery.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -slug: /en/sql-reference/table-functions/fuzzQuery -sidebar_position: 75 -sidebar_label: fuzzQuery ---- - -# fuzzQuery - -Perturbs the given query string with random variations. - -``` sql -fuzzQuery(query[, max_query_length[, random_seed]]) -``` - -**Arguments** - -- `query` (String) - The source query to perform the fuzzing on. -- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process. -- `random_seed` (UInt64) - A random seed for producing stable results. - -**Returned Value** - -A table object with a single column containing perturbed query strings. - -## Usage Example - -``` sql -SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2; -``` - -``` - ┌─query──────────────────────────────────────────────────────────┐ -1. │ SELECT 'a' AS key GROUP BY key │ -2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │ - └────────────────────────────────────────────────────────────────┘ -``` diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 887c5cb86bc..1d99d223ee9 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -209,8 +209,8 @@ std::vector Client::loadWarningMessages() {} /* query_parameters */, "" /* query_id */, QueryProcessingStage::Complete, - &global_context->getSettingsRef(), - &global_context->getClientInfo(), false, {}); + &client_context->getSettingsRef(), + &client_context->getClientInfo(), false, {}); while (true) { Packet packet = connection->receivePacket(); @@ -306,9 +306,6 @@ void Client::initialize(Poco::Util::Application & self) if (env_password && !config().has("password")) config().setString("password", env_password); - // global_context->setApplicationType(Context::ApplicationType::CLIENT); - global_context->setQueryParameters(query_parameters); - /// settings and limits could be specified in config file, but passed settings has higher priority for (const auto & setting : global_context->getSettingsRef().allUnchanged()) { @@ -382,7 +379,7 @@ try showWarnings(); /// Set user password complexity rules - auto & access_control = global_context->getAccessControl(); + auto & access_control = client_context->getAccessControl(); access_control.setPasswordComplexityRules(connection->getPasswordComplexityRules()); if (is_interactive && !delayed_interactive) @@ -459,7 +456,7 @@ void Client::connect() << connection_parameters.host << ":" << connection_parameters.port << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; - connection = Connection::createConnection(connection_parameters, global_context); + connection = Connection::createConnection(connection_parameters, client_context); if (max_client_network_bandwidth) { @@ -528,7 +525,7 @@ void Client::connect() } } - if (!global_context->getSettingsRef().use_client_time_zone) + if (!client_context->getSettingsRef().use_client_time_zone) { const auto & time_zone = connection->getServerTimezone(connection_parameters.timeouts); if (!time_zone.empty()) @@ -611,7 +608,7 @@ void Client::printChangedSettings() const } }; - print_changes(global_context->getSettingsRef().changes(), "settings"); + print_changes(client_context->getSettingsRef().changes(), "settings"); print_changes(cmd_merge_tree_settings.changes(), "MergeTree settings"); } @@ -709,7 +706,7 @@ bool Client::processWithFuzzing(const String & full_query) { const char * begin = full_query.data(); orig_ast = parseQuery(begin, begin + full_query.size(), - global_context->getSettingsRef(), + client_context->getSettingsRef(), /*allow_multi_statements=*/ true); } catch (const Exception & e) @@ -733,7 +730,7 @@ bool Client::processWithFuzzing(const String & full_query) } // Kusto is not a subject for fuzzing (yet) - if (global_context->getSettingsRef().dialect == DB::Dialect::kusto) + if (client_context->getSettingsRef().dialect == DB::Dialect::kusto) { return true; } @@ -1138,8 +1135,6 @@ void Client::processOptions(const OptionsDescription & options_description, if ((query_fuzzer_runs = options["query-fuzzer-runs"].as())) { - // Fuzzer implies multiquery. - config().setBool("multiquery", true); // Ignore errors in parsing queries. config().setBool("ignore-error", true); ignore_error = true; @@ -1147,8 +1142,6 @@ void Client::processOptions(const OptionsDescription & options_description, if ((create_query_fuzzer_runs = options["create-query-fuzzer-runs"].as())) { - // Fuzzer implies multiquery. - config().setBool("multiquery", true); // Ignore errors in parsing queries. config().setBool("ignore-error", true); @@ -1166,6 +1159,11 @@ void Client::processOptions(const OptionsDescription & options_description, if (options.count("opentelemetry-tracestate")) global_context->getClientTraceContext().tracestate = options["opentelemetry-tracestate"].as(); + + /// In case of clickhouse-client the `client_context` can be just an alias for the `global_context`. + /// (There is no need to copy the context because clickhouse-client has no background tasks so it won't use that context in parallel.) + client_context = global_context; + initClientContext(); } @@ -1199,17 +1197,9 @@ void Client::processConfig() } print_stack_trace = config().getBool("stacktrace", false); - if (config().has("multiquery")) - is_multiquery = true; - pager = config().getString("pager", ""); setDefaultFormatsAndCompressionFromConfiguration(); - - global_context->setClientName(std::string(DEFAULT_CLIENT_NAME)); - global_context->setQueryKindInitial(); - global_context->setQuotaClientKey(config().getString("quota_key", "")); - global_context->setQueryKind(query_kind); } @@ -1362,13 +1352,6 @@ void Client::readArguments( allow_repeated_settings = true; else if (arg == "--allow_merge_tree_settings") allow_merge_tree_settings = true; - else if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-')) - { - /// Transform the abbreviated syntax '--multiquery ' into the full syntax '--multiquery -q ' - ++arg_num; - arg = argv[arg_num]; - addMultiquery(arg, common_arguments); - } else if (arg == "--password" && ((arg_num + 1) >= argc || std::string_view(argv[arg_num + 1]).starts_with('-'))) { common_arguments.emplace_back(arg); diff --git a/programs/client/Client.h b/programs/client/Client.h index 6d57a6ea648..9571440d6ba 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -9,17 +9,13 @@ namespace DB class Client : public ClientBase { public: - Client() - { - fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr); - } + Client() = default; void initialize(Poco::Util::Application & self) override; int main(const std::vector & /*args*/) override; protected: - Poco::Util::LayeredConfiguration & getClientConfiguration() override; bool processWithFuzzing(const String & full_query) override; diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 8cf1a4d1999..783f60cb8ff 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -311,6 +312,12 @@ try MainThreadStatus::getInstance(); + if (auto total_numa_memory = getNumaNodesTotalMemory(); total_numa_memory.has_value()) + { + LOG_INFO( + log, "Keeper is bound to a subset of NUMA nodes. Total memory of all available nodes: {}", ReadableSize(*total_numa_memory)); + } + #if !defined(NDEBUG) || !defined(__OPTIMIZE__) LOG_WARNING(log, "Keeper was built in debug mode. It will work slowly."); #endif diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 48e0cca7b73..88d5a0253d1 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -295,6 +295,8 @@ void LocalServer::cleanup() if (suggest) suggest.reset(); + client_context.reset(); + if (global_context) { global_context->shutdown(); @@ -436,7 +438,7 @@ void LocalServer::connect() in = input.get(); } connection = LocalConnection::createConnection( - connection_parameters, global_context, in, need_render_progress, need_render_profile_events, server_display_name); + connection_parameters, client_context, in, need_render_progress, need_render_profile_events, server_display_name); } @@ -497,8 +499,6 @@ try initTTYBuffer(toProgressOption(getClientConfiguration().getString("progress", "default"))); ASTAlterCommand::setFormatAlterCommandsWithParentheses(true); - applyCmdSettings(global_context); - /// try to load user defined executable functions, throw on error and die try { @@ -510,6 +510,11 @@ try throw; } + /// Must be called after we stopped initializing the global context and changing its settings. + /// After this point the global context must be stayed almost unchanged till shutdown, + /// and all necessary changes must be made to the client context instead. + createClientContext(); + if (is_interactive) { clearTerminal(); @@ -564,9 +569,6 @@ void LocalServer::processConfig() if (!queries.empty() && getClientConfiguration().has("queries-file")) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time"); - if (getClientConfiguration().has("multiquery")) - is_multiquery = true; - pager = getClientConfiguration().getString("pager", ""); delayed_interactive = getClientConfiguration().has("interactive") && (!queries.empty() || getClientConfiguration().has("queries-file")); @@ -735,6 +737,9 @@ void LocalServer::processConfig() /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(getClientConfiguration()); + /// Command-line parameters can override settings from the default profile. + applyCmdSettings(global_context); + /// We load temporary database first, because projections need it. DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase(); @@ -778,10 +783,6 @@ void LocalServer::processConfig() server_display_name = getClientConfiguration().getString("display_name", ""); prompt_by_server_display_name = getClientConfiguration().getRawString("prompt_by_server_display_name.default", ":) "); - - global_context->setQueryKindInitial(); - global_context->setQueryKind(query_kind); - global_context->setQueryParameters(query_parameters); } @@ -860,6 +861,16 @@ void LocalServer::applyCmdOptions(ContextMutablePtr context) } +void LocalServer::createClientContext() +{ + /// In case of clickhouse-local it's necessary to use a separate context for client-related purposes. + /// We can't just change the global context because it is used in background tasks (for example, in merges) + /// which don't expect that the global context can suddenly change. + client_context = Context::createCopy(global_context); + initClientContext(); +} + + void LocalServer::processOptions(const OptionsDescription &, const CommandLineOptions & options, const std::vector &, const std::vector &) { if (options.count("table")) @@ -922,13 +933,6 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1)); } } - else if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-')) - { - /// Transform the abbreviated syntax '--multiquery ' into the full syntax '--multiquery -q ' - ++arg_num; - arg = argv[arg_num]; - addMultiquery(arg, common_arguments); - } else { common_arguments.emplace_back(arg); diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index 0715f358313..ae9980311e1 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -31,7 +31,6 @@ public: int main(const std::vector & /*args*/) override; protected: - Poco::Util::LayeredConfiguration & getClientConfiguration() override; void connect() override; @@ -50,7 +49,6 @@ protected: void processConfig() override; void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector &, std::vector &) override; - void updateLoggerLevel(const String & logs_level) override; private: @@ -67,6 +65,8 @@ private: void applyCmdOptions(ContextMutablePtr context); void applyCmdSettings(ContextMutablePtr context); + void createClientContext(); + ServerSettings server_settings; std::optional status; diff --git a/programs/odbc-bridge/tests/CMakeLists.txt b/programs/odbc-bridge/tests/CMakeLists.txt index edf364ea192..f1411dbb554 100644 --- a/programs/odbc-bridge/tests/CMakeLists.txt +++ b/programs/odbc-bridge/tests/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp ../validateODBCConnectionString.cpp) -target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io) +target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io clickhouse_common_config) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 16888015f8b..86ebd3a3225 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -140,6 +141,7 @@ # include #endif + #include /// A minimal file used when the server is run without installation INCBIN(resource_embedded_xml, SOURCE_DIR "/programs/server/embedded.xml"); @@ -754,6 +756,12 @@ try setenv("OPENSSL_CONF", config_dir.c_str(), true); /// NOLINT } + if (auto total_numa_memory = getNumaNodesTotalMemory(); total_numa_memory.has_value()) + { + LOG_INFO( + log, "ClickHouse is bound to a subset of NUMA nodes. Total memory of all available nodes: {}", ReadableSize(*total_numa_memory)); + } + registerInterpreters(); registerFunctions(); registerAggregateFunctions(); diff --git a/programs/server/config.d/backups.xml b/programs/server/config.d/backups.xml new file mode 100644 index 00000000000..382794e1123 --- /dev/null +++ b/programs/server/config.d/backups.xml @@ -0,0 +1,13 @@ + + + + + local + /tmp/backups/ + + + + + backups + + diff --git a/programs/server/config.d/enable_keeper_map.xml b/programs/server/config.d/enable_keeper_map.xml new file mode 120000 index 00000000000..47c36159882 --- /dev/null +++ b/programs/server/config.d/enable_keeper_map.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/enable_keeper_map.xml \ No newline at end of file diff --git a/programs/server/config.d/session_log.xml b/programs/server/config.d/session_log.xml new file mode 120000 index 00000000000..7678cc33129 --- /dev/null +++ b/programs/server/config.d/session_log.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/session_log.xml \ No newline at end of file diff --git a/src/Access/Common/AccessRightsElement.cpp b/src/Access/Common/AccessRightsElement.cpp index 24ff4e7631b..63bda09a51b 100644 --- a/src/Access/Common/AccessRightsElement.cpp +++ b/src/Access/Common/AccessRightsElement.cpp @@ -224,7 +224,11 @@ void AccessRightsElement::replaceEmptyDatabase(const String & current_database) String AccessRightsElement::toString() const { return toStringImpl(*this, true); } String AccessRightsElement::toStringWithoutOptions() const { return toStringImpl(*this, false); } - +String AccessRightsElement::toStringForAccessTypeSource() const +{ + String result{access_flags.toKeywords().front()}; + return result + " ON *.*"; +} bool AccessRightsElements::empty() const { return std::all_of(begin(), end(), [](const AccessRightsElement & e) { return e.empty(); }); } diff --git a/src/Access/Common/AccessRightsElement.h b/src/Access/Common/AccessRightsElement.h index ba625fc43df..78e94e6f2e4 100644 --- a/src/Access/Common/AccessRightsElement.h +++ b/src/Access/Common/AccessRightsElement.h @@ -89,6 +89,7 @@ struct AccessRightsElement /// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table". String toString() const; String toStringWithoutOptions() const; + String toStringForAccessTypeSource() const; }; diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index a2807ecc5ea..e50521a0730 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -38,6 +38,24 @@ namespace ErrorCodes namespace { + const std::vector> source_and_table_engines = { + {AccessType::FILE, "File"}, + {AccessType::URL, "URL"}, + {AccessType::REMOTE, "Distributed"}, + {AccessType::MONGO, "MongoDB"}, + {AccessType::REDIS, "Redis"}, + {AccessType::MYSQL, "MySQL"}, + {AccessType::POSTGRES, "PostgreSQL"}, + {AccessType::SQLITE, "SQLite"}, + {AccessType::ODBC, "ODBC"}, + {AccessType::JDBC, "JDBC"}, + {AccessType::HDFS, "HDFS"}, + {AccessType::S3, "S3"}, + {AccessType::HIVE, "Hive"}, + {AccessType::AZURE, "AzureBlobStorage"} + }; + + AccessRights mixAccessRightsFromUserAndRoles(const User & user, const EnabledRolesInfo & roles_info) { AccessRights res = user.access; @@ -206,22 +224,6 @@ namespace } /// There is overlap between AccessType sources and table engines, so the following code avoids user granting twice. - static const std::vector> source_and_table_engines = { - {AccessType::FILE, "File"}, - {AccessType::URL, "URL"}, - {AccessType::REMOTE, "Distributed"}, - {AccessType::MONGO, "MongoDB"}, - {AccessType::REDIS, "Redis"}, - {AccessType::MYSQL, "MySQL"}, - {AccessType::POSTGRES, "PostgreSQL"}, - {AccessType::SQLITE, "SQLite"}, - {AccessType::ODBC, "ODBC"}, - {AccessType::JDBC, "JDBC"}, - {AccessType::HDFS, "HDFS"}, - {AccessType::S3, "S3"}, - {AccessType::HIVE, "Hive"}, - {AccessType::AZURE, "AzureBlobStorage"} - }; /// Sync SOURCE and TABLE_ENGINE, so only need to check TABLE_ENGINE later. if (access_control.doesTableEnginesRequireGrant()) @@ -267,6 +269,11 @@ namespace template std::string_view getDatabase(std::string_view arg1, const OtherArgs &...) { return arg1; } + + std::string_view getTableEngine() { return {}; } + + template + std::string_view getTableEngine(std::string_view arg1, const OtherArgs &...) { return arg1; } } @@ -620,18 +627,58 @@ bool ContextAccess::checkAccessImplHelper(const ContextPtr & context, AccessFlag if (!granted) { - if (grant_option && acs->isGranted(flags, args...)) + auto access_denied_no_grant = [&](AccessFlags access_flags, FmtArgs && ...fmt_args) { + if (grant_option && acs->isGranted(access_flags, fmt_args...)) + { + return access_denied(ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. " + "The required privileges have been granted, but without grant option. " + "To execute this query, it's necessary to have the grant {} WITH GRANT OPTION", + AccessRightsElement{access_flags, fmt_args...}.toStringWithoutOptions()); + } + return access_denied(ErrorCodes::ACCESS_DENIED, - "{}: Not enough privileges. " - "The required privileges have been granted, but without grant option. " - "To execute this query, it's necessary to have the grant {} WITH GRANT OPTION", - AccessRightsElement{flags, args...}.toStringWithoutOptions()); + "{}: Not enough privileges. To execute this query, it's necessary to have the grant {}", + AccessRightsElement{access_flags, fmt_args...}.toStringWithoutOptions() + (grant_option ? " WITH GRANT OPTION" : "")); + }; + + /// As we check the SOURCES from the Table Engine logic, direct prompt about Table Engine would be misleading + /// since SOURCES is not granted actually. In order to solve this, turn the prompt logic back to Sources. + if (flags & AccessType::TABLE_ENGINE && !access_control->doesTableEnginesRequireGrant()) + { + AccessFlags new_flags; + + String table_engine_name{getTableEngine(args...)}; + for (const auto & source_and_table_engine : source_and_table_engines) + { + const auto & table_engine = std::get<1>(source_and_table_engine); + if (table_engine != table_engine_name) continue; + const auto & source = std::get<0>(source_and_table_engine); + /// Set the flags from Table Engine to SOURCES so that prompts can be meaningful. + new_flags = source; + break; + } + + /// Might happen in the case of grant Table Engine on A (but not source), then revoke A. + if (new_flags.isEmpty()) + return access_denied_no_grant(flags, args...); + + if (grant_option && acs->isGranted(flags, args...)) + { + return access_denied(ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. " + "The required privileges have been granted, but without grant option. " + "To execute this query, it's necessary to have the grant {} WITH GRANT OPTION", + AccessRightsElement{new_flags}.toStringForAccessTypeSource()); + } + + return access_denied(ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. To execute this query, it's necessary to have the grant {}", + AccessRightsElement{new_flags}.toStringForAccessTypeSource() + (grant_option ? " WITH GRANT OPTION" : "")); } - return access_denied(ErrorCodes::ACCESS_DENIED, - "{}: Not enough privileges. To execute this query, it's necessary to have the grant {}", - AccessRightsElement{flags, args...}.toStringWithoutOptions() + (grant_option ? " WITH GRANT OPTION" : "")); + return access_denied_no_grant(flags, args...); } struct PrecalculatedFlags diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp index 20acda213da..38f2fcb9fb9 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp @@ -1,12 +1,12 @@ -#include -#include - #include #include #include #include -#include +#include +#include +#include +#include #include #include @@ -15,18 +15,14 @@ #include #include -#include -#include - #include #include -#include #include -#include -#include -#include -#include +#include +#include + +#include namespace DB @@ -51,7 +47,7 @@ struct AggregateFunctionGroupArrayIntersectData }; -/// Puts all values to the hash set. Returns an array of unique values. Implemented for numeric types. +/// Puts all values to the hash set. Returns an array of unique values present in all inputs. Implemented for numeric types. template class AggregateFunctionGroupArrayIntersect : public IAggregateFunctionDataHelper, AggregateFunctionGroupArrayIntersect> @@ -69,7 +65,7 @@ public: : IAggregateFunctionDataHelper, AggregateFunctionGroupArrayIntersect>({argument_type}, parameters_, result_type_) {} - String getName() const override { return "GroupArrayIntersect"; } + String getName() const override { return "groupArrayIntersect"; } bool allocatesMemoryInArena() const override { return false; } @@ -158,7 +154,7 @@ public: set.reserve(size); for (size_t i = 0; i < size; ++i) { - int key; + T key; readIntBinary(key, buf); set.insert(key); } @@ -213,7 +209,7 @@ public: : IAggregateFunctionDataHelper>({input_data_type_}, parameters_, result_type_) , input_data_type(result_type_) {} - String getName() const override { return "GroupArrayIntersect"; } + String getName() const override { return "groupArrayIntersect"; } bool allocatesMemoryInArena() const override { return true; } @@ -240,7 +236,7 @@ public: { const char * begin = nullptr; StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin); - assert(serialized.data != nullptr); + chassert(serialized.data != nullptr); set.emplace(SerializedKeyHolder{serialized, *arena}, it, inserted); } } @@ -260,7 +256,7 @@ public: { const char * begin = nullptr; StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin); - assert(serialized.data != nullptr); + chassert(serialized.data != nullptr); it = set.find(serialized); if (it != nullptr) diff --git a/src/AggregateFunctions/SingleValueData.cpp b/src/AggregateFunctions/SingleValueData.cpp index a14caf00f73..11931acbbc8 100644 --- a/src/AggregateFunctions/SingleValueData.cpp +++ b/src/AggregateFunctions/SingleValueData.cpp @@ -195,7 +195,7 @@ bool SingleValueDataFixed::isEqualTo(const IColumn & column, size_t index) co template bool SingleValueDataFixed::isEqualTo(const SingleValueDataFixed & to) const { - return has() && to.value == value; + return has() && to.has() && to.value == value; } template @@ -904,6 +904,7 @@ bool SingleValueDataNumeric::isEqualTo(const DB::IColumn & column, size_t ind template bool SingleValueDataNumeric::isEqualTo(const DB::SingleValueDataBase & to) const { + /// to.has() is checked in memory.get().isEqualTo auto const & other = assert_cast(to); return memory.get().isEqualTo(other.memory.get()); } @@ -917,6 +918,7 @@ void SingleValueDataNumeric::set(const DB::IColumn & column, size_t row_num, template void SingleValueDataNumeric::set(const DB::SingleValueDataBase & to, DB::Arena * arena) { + /// to.has() is checked in memory.get().set auto const & other = assert_cast(to); return memory.get().set(other.memory.get(), arena); } @@ -924,6 +926,7 @@ void SingleValueDataNumeric::set(const DB::SingleValueDataBase & to, DB::Aren template bool SingleValueDataNumeric::setIfSmaller(const DB::SingleValueDataBase & to, DB::Arena * arena) { + /// to.has() is checked in memory.get().setIfSmaller auto const & other = assert_cast(to); return memory.get().setIfSmaller(other.memory.get(), arena); } @@ -931,6 +934,7 @@ bool SingleValueDataNumeric::setIfSmaller(const DB::SingleValueDataBase & to, template bool SingleValueDataNumeric::setIfGreater(const DB::SingleValueDataBase & to, DB::Arena * arena) { + /// to.has() is checked in memory.get().setIfGreater auto const & other = assert_cast(to); return memory.get().setIfGreater(other.memory.get(), arena); } @@ -1191,7 +1195,7 @@ bool SingleValueDataString::isEqualTo(const DB::IColumn & column, size_t row_num bool SingleValueDataString::isEqualTo(const SingleValueDataBase & other) const { auto const & to = assert_cast(other); - return has() && to.getStringRef() == getStringRef(); + return has() && to.has() && to.getStringRef() == getStringRef(); } void SingleValueDataString::set(const IColumn & column, size_t row_num, Arena * arena) @@ -1291,7 +1295,7 @@ bool SingleValueDataGeneric::isEqualTo(const IColumn & column, size_t row_num) c bool SingleValueDataGeneric::isEqualTo(const DB::SingleValueDataBase & other) const { auto const & to = assert_cast(other); - return has() && to.value == value; + return has() && to.has() && to.value == value; } void SingleValueDataGeneric::set(const IColumn & column, size_t row_num, Arena *) diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h index df3687f8fd9..b36c1401798 100644 --- a/src/Analyzer/IQueryTreeNode.h +++ b/src/Analyzer/IQueryTreeNode.h @@ -49,7 +49,7 @@ enum class QueryTreeNodeType : uint8_t /// Convert query tree node type to string const char * toString(QueryTreeNodeType type); -/** Query tree is semantical representation of query. +/** Query tree is a semantic representation of query. * Query tree node represent node in query tree. * IQueryTreeNode is base class for all query tree nodes. * diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 5c68bca3a6e..e136440556f 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -68,10 +68,13 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes) return nullptr; } -/// Checks if the node is combination of isNull and notEquals functions of two the same arguments +/// Checks if the node is combination of isNull and notEquals functions of two the same arguments: +/// [ (a <> b AND) ] (a IS NULL) AND (b IS NULL) bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs) { QueryTreeNodePtrWithHashSet all_arguments; + QueryTreeNodePtrWithHashSet is_null_arguments; + for (const auto & node : nodes) { const auto * func_node = node->as(); @@ -80,7 +83,11 @@ bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, const auto & arguments = func_node->getArguments().getNodes(); if (func_node->getFunctionName() == "isNull" && arguments.size() == 1) + { all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0])); + is_null_arguments.insert(QueryTreeNodePtrWithHash(arguments[0])); + } + else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2) { if (arguments[0]->isEqual(*arguments[1])) @@ -95,7 +102,7 @@ bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, return false; } - if (all_arguments.size() != 2) + if (all_arguments.size() != 2 || is_null_arguments.size() != 2) return false; lhs = all_arguments.begin()->node; diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index a62b6e56ac5..ed1227b0f00 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -268,6 +268,8 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q } } + const auto enable_order_by_all = updated_context->getSettingsRef().enable_order_by_all; + auto current_query_tree = std::make_shared(std::move(updated_context), std::move(settings_changes)); current_query_tree->setIsSubquery(is_subquery); @@ -281,7 +283,10 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q current_query_tree->setIsGroupByWithRollup(select_query_typed.group_by_with_rollup); current_query_tree->setIsGroupByWithGroupingSets(select_query_typed.group_by_with_grouping_sets); current_query_tree->setIsGroupByAll(select_query_typed.group_by_all); - current_query_tree->setIsOrderByAll(select_query_typed.order_by_all); + /// order_by_all flag in AST is set w/o consideration of `enable_order_by_all` setting + /// since SETTINGS section has not been parsed yet, - so, check the setting here + if (enable_order_by_all) + current_query_tree->setIsOrderByAll(select_query_typed.order_by_all); current_query_tree->setOriginalAST(select_query); auto current_context = current_query_tree->getContext(); diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index f7919b6422c..4443f83596f 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -62,7 +62,7 @@ namespace ErrorCodes namespace { -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) /** This visitor checks if Query Tree structure is valid after each pass * in debug build. @@ -183,7 +183,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node) for (size_t i = 0; i < passes_size; ++i) { passes[i]->run(query_tree_node, current_context); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) ValidationChecker(passes[i]->getName()).visit(query_tree_node); #endif } @@ -208,7 +208,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pa for (size_t i = 0; i < up_to_pass_index; ++i) { passes[i]->run(query_tree_node, current_context); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) ValidationChecker(passes[i]->getName()).visit(query_tree_node); #endif } diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index a43e3e62ebc..51fe5ee6ec2 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -1740,7 +1740,7 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(Qu const auto * tuple_data_type = typeid_cast(result_type.get()); if (!tuple_data_type) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Qualified matcher {} find non compound expression {} with type {}. Expected tuple or array of tuples. In scope {}", + "Qualified matcher {} found a non-compound expression {} with type {}. Expected a tuple or an array of tuples. In scope {}", matcher_node->formatASTForErrorMessage(), expression_query_tree_node->formatASTForErrorMessage(), expression_query_tree_node->getResultType()->getName(), @@ -4124,9 +4124,7 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo auto * column_to_interpolate = interpolate_node_typed.getExpression()->as(); if (!column_to_interpolate) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "INTERPOLATE can work only for identifiers, but {} is found", + throw Exception(ErrorCodes::LOGICAL_ERROR, "INTERPOLATE can work only for indentifiers, but {} is found", interpolate_node_typed.getExpression()->formatASTForErrorMessage()); auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName(); diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index fa8ed5855dd..cd3f963b15d 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -105,7 +105,7 @@ bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAS auto new_query = query.clone(); adjustCreateQueryForBackup(new_query, global_context); ASTCreateQuery & create = typeid_cast(*new_query); - create.setUUID({}); + create.resetUUIDs(); create.if_not_exists = false; return new_query; }; diff --git a/src/Backups/RestoreCoordinationLocal.cpp b/src/Backups/RestoreCoordinationLocal.cpp index f51d6c0c1d8..9fe22f874b4 100644 --- a/src/Backups/RestoreCoordinationLocal.cpp +++ b/src/Backups/RestoreCoordinationLocal.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -67,7 +68,7 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer auto it = create_query_uuids.find(query_str); if (it != create_query_uuids.end()) { - create_query.setUUID(it->second); + it->second.copyToQuery(create_query); return true; } return false; @@ -79,7 +80,8 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer return; } - auto new_uuids = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true); + CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true}; + new_uuids.copyToQuery(create_query); { std::lock_guard lock{mutex}; diff --git a/src/Backups/RestoreCoordinationLocal.h b/src/Backups/RestoreCoordinationLocal.h index 5e51b719d63..35f93574b68 100644 --- a/src/Backups/RestoreCoordinationLocal.h +++ b/src/Backups/RestoreCoordinationLocal.h @@ -1,16 +1,17 @@ #pragma once #include -#include +#include +#include #include #include #include -namespace Poco { class Logger; } - namespace DB { +class ASTCreateQuery; + /// Implementation of the IRestoreCoordination interface performing coordination in memory. class RestoreCoordinationLocal : public IRestoreCoordination @@ -55,7 +56,7 @@ private: std::set> acquired_tables_in_replicated_databases; std::unordered_set acquired_data_in_replicated_tables; - std::unordered_map create_query_uuids; + std::unordered_map create_query_uuids; std::unordered_set acquired_data_in_keeper_map_tables; mutable std::mutex mutex; diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp index 84106737fc9..44214d00be5 100644 --- a/src/Backups/RestoreCoordinationRemote.cpp +++ b/src/Backups/RestoreCoordinationRemote.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -269,7 +270,8 @@ bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String & void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_query) { String query_str = serializeAST(create_query); - String new_uuids_str = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true).toString(); + CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true}; + String new_uuids_str = new_uuids.toString(); auto holder = with_retries.createRetriesControlHolder("generateUUIDForTable"); holder.retries_ctl.retryLoop( @@ -281,11 +283,14 @@ void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_que Coordination::Error res = zk->tryCreate(path, new_uuids_str, zkutil::CreateMode::Persistent); if (res == Coordination::Error::ZOK) + { + new_uuids.copyToQuery(create_query); return; + } if (res == Coordination::Error::ZNODEEXISTS) { - create_query.setUUID(ASTCreateQuery::UUIDs::fromString(zk->get(path))); + CreateQueryUUIDs::fromString(zk->get(path)).copyToQuery(create_query); return; } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dac20aaca48..8c133971785 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -226,6 +226,9 @@ add_object_library(clickhouse_storages_windowview Storages/WindowView) add_object_library(clickhouse_storages_s3queue Storages/ObjectStorageQueue) add_object_library(clickhouse_storages_materializedview Storages/MaterializedView) add_object_library(clickhouse_client Client) +# Always compile this file with the highest possible level of optimizations, even in Debug builds. +# https://github.com/ClickHouse/ClickHouse/issues/65745 +set_source_files_properties(Client/ClientBaseOptimizedParts.cpp PROPERTIES COMPILE_FLAGS "-O3") add_object_library(clickhouse_bridge BridgeHelper) add_object_library(clickhouse_server Server) add_object_library(clickhouse_server_http Server/HTTP) @@ -543,7 +546,7 @@ if (TARGET ch_contrib::libpqxx) endif() if (TARGET ch_contrib::datasketches) - target_link_libraries (clickhouse_aggregate_functions PRIVATE ch_contrib::datasketches) + dbms_target_link_libraries(PUBLIC ch_contrib::datasketches) endif () target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7af199131b6..a88eed25db1 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -70,7 +70,6 @@ #include #include -#include #include #include #include @@ -80,6 +79,8 @@ #include #include "config.h" +#include +#include #if USE_GWP_ASAN # include @@ -108,7 +109,6 @@ namespace ErrorCodes extern const int UNEXPECTED_PACKET_FROM_SERVER; extern const int INVALID_USAGE_OF_INPUT; extern const int CANNOT_SET_SIGNAL_HANDLER; - extern const int UNRECOGNIZED_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int CANNOT_OPEN_FILE; extern const int FILE_ALREADY_EXISTS; @@ -309,9 +309,16 @@ public: ClientBase::~ClientBase() { - writeSignalIDtoSignalPipe(SignalListener::StopThread); - signal_listener_thread.join(); - HandledSignals::instance().reset(); + try + { + writeSignalIDtoSignalPipe(SignalListener::StopThread); + signal_listener_thread.join(); + HandledSignals::instance().reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } } ClientBase::ClientBase( @@ -471,7 +478,7 @@ void ClientBase::sendExternalTables(ASTPtr parsed_query) std::vector data; for (auto & table : external_tables) - data.emplace_back(table.getData(global_context)); + data.emplace_back(table.getData(client_context)); connection->sendExternalTablesData(data); } @@ -684,10 +691,10 @@ try /// intermixed with data with parallel formatting. /// It may increase code complexity significantly. if (!extras_into_stdout || select_only_into_file) - output_format = global_context->getOutputFormatParallelIfPossible( + output_format = client_context->getOutputFormatParallelIfPossible( current_format, out_file_buf ? *out_file_buf : *out_buf, block); else - output_format = global_context->getOutputFormat( + output_format = client_context->getOutputFormat( current_format, out_file_buf ? *out_file_buf : *out_buf, block); output_format->setAutoFlush(); @@ -739,14 +746,6 @@ void ClientBase::adjustSettings() /// NOTE: Do not forget to set changed=false to avoid sending it to the server (to avoid breakage read only profiles) - /// In case of multi-query we allow data after semicolon since it will be - /// parsed by the client and interpreted as new query - if (is_multiquery && !global_context->getSettingsRef().input_format_values_allow_data_after_semicolon.changed) - { - settings.input_format_values_allow_data_after_semicolon = true; - settings.input_format_values_allow_data_after_semicolon.changed = false; - } - /// Do not limit pretty format output in case of --pager specified or in case of stdout is not a tty. if (!pager.empty() || !stdout_is_a_tty) { @@ -766,6 +765,15 @@ void ClientBase::adjustSettings() global_context->setSettings(settings); } +void ClientBase::initClientContext() +{ + client_context->setClientName(std::string(DEFAULT_CLIENT_NAME)); + client_context->setQuotaClientKey(getClientConfiguration().getString("quota_key", "")); + client_context->setQueryKindInitial(); + client_context->setQueryKind(query_kind); + client_context->setQueryParameters(query_parameters); +} + bool ClientBase::isRegularFile(int fd) { struct stat file_stat; @@ -956,7 +964,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) /// client-side. Thus we need to parse the query. const char * begin = full_query.data(); auto parsed_query = parseQuery(begin, begin + full_query.size(), - global_context->getSettingsRef(), + client_context->getSettingsRef(), /*allow_multi_statements=*/ false); if (!parsed_query) @@ -979,7 +987,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) /// But for asynchronous inserts we don't extract data, because it's needed /// to be done on server side in that case (for coalescing the data from multiple inserts on server side). const auto * insert = parsed_query->as(); - if (insert && isSyncInsertWithData(*insert, global_context)) + if (insert && isSyncInsertWithData(*insert, client_context)) query_to_execute = full_query.substr(0, insert->data - full_query.data()); else query_to_execute = full_query; @@ -1097,7 +1105,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa } } - const auto & settings = global_context->getSettingsRef(); + const auto & settings = client_context->getSettingsRef(); const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1; int retries_left = 10; @@ -1112,10 +1120,10 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa connection_parameters.timeouts, query, query_parameters, - global_context->getCurrentQueryId(), + client_context->getCurrentQueryId(), query_processing_stage, - &global_context->getSettingsRef(), - &global_context->getClientInfo(), + &client_context->getSettingsRef(), + &client_context->getClientInfo(), true, [&](const Progress & progress) { onProgress(progress); }); @@ -1302,7 +1310,7 @@ void ClientBase::onProgress(const Progress & value) void ClientBase::onTimezoneUpdate(const String & tz) { - global_context->setSetting("session_timezone", tz); + client_context->setSetting("session_timezone", tz); } @@ -1498,25 +1506,18 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de void ClientBase::setInsertionTable(const ASTInsertQuery & insert_query) { - if (!global_context->hasInsertionTable() && insert_query.table) + if (!client_context->hasInsertionTable() && insert_query.table) { String table = insert_query.table->as().shortName(); if (!table.empty()) { String database = insert_query.database ? insert_query.database->as().shortName() : ""; - global_context->setInsertionTable(StorageID(database, table)); + client_context->setInsertionTable(StorageID(database, table)); } } } -void ClientBase::addMultiquery(std::string_view query, Arguments & common_arguments) const -{ - common_arguments.emplace_back("--multiquery"); - common_arguments.emplace_back("-q"); - common_arguments.emplace_back(query); -} - namespace { bool isStdinNotEmptyAndValid(ReadBufferFromFileDescriptor & std_in) @@ -1555,7 +1556,7 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars const auto & parsed_insert_query = parsed_query->as(); if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && !isStdinNotEmptyAndValid(std_in)))) { - const auto & settings = global_context->getSettingsRef(); + const auto & settings = client_context->getSettingsRef(); if (settings.throw_if_no_data_to_insert) throw Exception(ErrorCodes::NO_DATA_TO_INSERT, "No data to insert"); else @@ -1569,10 +1570,10 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars connection_parameters.timeouts, query, query_parameters, - global_context->getCurrentQueryId(), + client_context->getCurrentQueryId(), query_processing_stage, - &global_context->getSettingsRef(), - &global_context->getClientInfo(), + &client_context->getSettingsRef(), + &client_context->getClientInfo(), true, [&](const Progress & progress) { onProgress(progress); }); @@ -1620,7 +1621,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des /// Set callback to be called on file progress. if (tty_buf) - progress_indication.setFileProgressCallback(global_context, *tty_buf); + progress_indication.setFileProgressCallback(client_context, *tty_buf); } /// If data fetched from file (maybe compressed file) @@ -1654,10 +1655,10 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des } StorageFile::CommonArguments args{ - WithContext(global_context), + WithContext(client_context), parsed_insert_query->table_id, current_format, - getFormatSettings(global_context), + getFormatSettings(client_context), compression_method, columns_for_storage_file, ConstraintsDescription{}, @@ -1665,7 +1666,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des {}, String{}, }; - StoragePtr storage = std::make_shared(in_file, global_context->getUserFilesPath(), args); + StoragePtr storage = std::make_shared(in_file, client_context->getUserFilesPath(), args); storage->startup(); SelectQueryInfo query_info; @@ -1676,16 +1677,16 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des storage->read( plan, sample.getNames(), - storage->getStorageSnapshot(metadata, global_context), + storage->getStorageSnapshot(metadata, client_context), query_info, - global_context, + client_context, {}, - global_context->getSettingsRef().max_block_size, + client_context->getSettingsRef().max_block_size, getNumberOfPhysicalCPUCores()); auto builder = plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(global_context), - BuildQueryPipelineSettings::fromContext(global_context)); + QueryPlanOptimizationSettings::fromContext(client_context), + BuildQueryPipelineSettings::fromContext(client_context)); QueryPlanResourceHolder resources; auto pipe = QueryPipelineBuilder::getPipe(std::move(*builder), resources); @@ -1746,14 +1747,14 @@ void ClientBase::sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDes current_format = insert->format; } - auto source = global_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size); + auto source = client_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size); Pipe pipe(source); if (columns_description.hasDefaults()) { pipe.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, columns_description, *source, global_context); + return std::make_shared(header, columns_description, *source, client_context); }); } @@ -1915,12 +1916,12 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (is_interactive) { - global_context->setCurrentQueryId(""); + client_context->setCurrentQueryId(""); // Generate a new query_id for (const auto & query_id_format : query_id_formats) { writeString(query_id_format.first, std_out); - writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", global_context->getCurrentQueryId())), std_out); + writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", client_context->getCurrentQueryId())), std_out); writeChar('\n', std_out); std_out.next(); } @@ -1947,7 +1948,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin auto password = auth_data->getPassword(); if (password) - global_context->getAccessControl().checkPasswordComplexityRules(*password); + client_context->getAccessControl().checkPasswordComplexityRules(*password); } } } @@ -1962,15 +1963,15 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin std::optional old_settings; SCOPE_EXIT_SAFE({ if (old_settings) - global_context->setSettings(*old_settings); + client_context->setSettings(*old_settings); }); auto apply_query_settings = [&](const IAST & settings_ast) { if (!old_settings) - old_settings.emplace(global_context->getSettingsRef()); - global_context->applySettingsChanges(settings_ast.as()->changes); - global_context->resetSettingsToDefaultValue(settings_ast.as()->default_settings); + old_settings.emplace(client_context->getSettingsRef()); + client_context->applySettingsChanges(settings_ast.as()->changes); + client_context->resetSettingsToDefaultValue(settings_ast.as()->default_settings); }; const auto * insert = parsed_query->as(); @@ -2003,7 +2004,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (insert && insert->select) insert->tryFindInputFunction(input_function); - bool is_async_insert_with_inlined_data = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData(); + bool is_async_insert_with_inlined_data = client_context->getSettingsRef().async_insert && insert && insert->hasInlinedData(); if (is_async_insert_with_inlined_data) { @@ -2038,9 +2039,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (change.name == "profile") current_profile = change.value.safeGet(); else - global_context->applySettingChange(change); + client_context->applySettingChange(change); } - global_context->resetSettingsToDefaultValue(set_query->default_settings); + client_context->resetSettingsToDefaultValue(set_query->default_settings); /// Query parameters inside SET queries should be also saved on the client side /// to override their previous definitions set with --param_* arguments @@ -2048,7 +2049,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin for (const auto & [name, value] : set_query->query_parameters) query_parameters.insert_or_assign(name, value); - global_context->addQueryParameters(NameToNameMap{set_query->query_parameters.begin(), set_query->query_parameters.end()}); + client_context->addQueryParameters(NameToNameMap{set_query->query_parameters.begin(), set_query->query_parameters.end()}); } if (const auto * use_query = parsed_query->as()) { @@ -2125,8 +2126,8 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( if (this_query_begin >= all_queries_end) return MultiQueryProcessingStage::QUERIES_END; - unsigned max_parser_depth = static_cast(global_context->getSettingsRef().max_parser_depth); - unsigned max_parser_backtracks = static_cast(global_context->getSettingsRef().max_parser_backtracks); + unsigned max_parser_depth = static_cast(client_context->getSettingsRef().max_parser_depth); + unsigned max_parser_backtracks = static_cast(client_context->getSettingsRef().max_parser_backtracks); // If there are only comments left until the end of file, we just // stop. The parser can't handle this situation because it always @@ -2146,7 +2147,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( try { parsed_query = parseQuery(this_query_end, all_queries_end, - global_context->getSettingsRef(), + client_context->getSettingsRef(), /*allow_multi_statements=*/ true); } catch (const Exception & e) @@ -2171,25 +2172,50 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( return MultiQueryProcessingStage::PARSING_FAILED; } - // INSERT queries may have the inserted data in the query text - // that follow the query itself, e.g. "insert into t format CSV 1;2". - // They need special handling. First of all, here we find where the - // inserted data ends. In multi-query mode, it is delimited by a - // newline. - // The VALUES format needs even more handling - we also allow the - // data to be delimited by semicolon. This case is handled later by - // the format parser itself. - // We can't do multiline INSERTs with inline data, because most - // row input formats (e.g. TSV) can't tell when the input stops, - // unlike VALUES. + // INSERT queries may have the inserted data in the query text that follow the query itself, e.g. "insert into t format CSV 1,2". They + // need special handling. + // - If the INSERT statement FORMAT is VALUES, we use the VALUES format parser to skip the inserted data until we reach the trailing single semicolon. + // - Other formats (e.g. FORMAT CSV) are arbitrarily more complex and tricky to parse. For example, we may be unable to distinguish if the semicolon + // is part of the data or ends the statement. In this case, we simply assume that the end of the INSERT statement is determined by \n\n (two newlines). auto * insert_ast = parsed_query->as(); const char * query_to_execute_end = this_query_end; - if (insert_ast && insert_ast->data) { - this_query_end = find_first_symbols<'\n'>(insert_ast->data, all_queries_end); + if (insert_ast->format == "Values") + { + // Invoke the VALUES format parser to skip the inserted data + ReadBufferFromMemory data_in(insert_ast->data, all_queries_end - insert_ast->data); + skipBOMIfExists(data_in); + do + { + skipWhitespaceIfAny(data_in); + if (data_in.eof() || *data_in.position() == ';') + break; + } + while (ValuesBlockInputFormat::skipToNextRow(&data_in, 1, 0)); + // Handle the case of a comment followed by a semicolon + // Example: INSERT INTO tab VALUES xx; -- {serverError xx} + // If we use this error hint, the next query should not be placed on the same line + this_query_end = insert_ast->data + data_in.count(); + const auto * pos_newline = find_first_symbols<'\n'>(this_query_end, all_queries_end); + if (pos_newline != this_query_end) + { + TestHint hint(String(this_query_end, pos_newline - this_query_end)); + if (hint.hasClientErrors() || hint.hasServerErrors()) + this_query_end = pos_newline; + } + } + else + { + // Handling of generic formats + auto pos_newline = String(insert_ast->data, all_queries_end).find("\n\n"); + if (pos_newline != std::string::npos) + this_query_end = insert_ast->data + pos_newline; + else + this_query_end = all_queries_end; + } insert_ast->end = this_query_end; - query_to_execute_end = isSyncInsertWithData(*insert_ast, global_context) ? insert_ast->data : this_query_end; + query_to_execute_end = isSyncInsertWithData(*insert_ast, client_context) ? insert_ast->data : this_query_end; } query_to_execute = all_queries_text.substr(this_query_begin - all_queries_text.data(), query_to_execute_end - this_query_begin); @@ -2222,7 +2248,10 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) size_t test_tags_length = getTestTagsLength(all_queries_text); /// Several queries separated by ';'. - /// INSERT data is ended by the end of line, not ';'. + /// INSERT data is ended by the empty line (\n\n), not ';'. + /// Unnecessary semicolons may cause data to be parsed containing ';' + /// e.g. 'insert into xx format csv val;' will insert "val;" instead of "val" + /// 'insert into xx format csv val\n;' will insert "val" and ";" /// An exception is VALUES format where we also support semicolon in /// addition to end of line. const char * this_query_begin = all_queries_text.data() + test_tags_length; @@ -2233,6 +2262,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) String query_to_execute; ASTPtr parsed_query; std::unique_ptr current_exception; + size_t retries_count = 0; + bool is_first = true; while (true) { @@ -2241,16 +2272,24 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) switch (stage) { case MultiQueryProcessingStage::QUERIES_END: + { + /// Compatible with old version when run interactive, e.g. "", "\ld" + if (is_first && is_interactive) + processTextAsSingleQuery(all_queries_text); + return true; + } case MultiQueryProcessingStage::PARSING_FAILED: { return true; } case MultiQueryProcessingStage::CONTINUE_PARSING: { + is_first = false; continue; } case MultiQueryProcessingStage::PARSING_EXCEPTION: { + is_first = false; this_query_end = find_first_symbols<'\n'>(this_query_end, all_queries_end); // Try to find test hint for syntax error. We don't know where @@ -2280,6 +2319,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) } case MultiQueryProcessingStage::EXECUTE_QUERY: { + is_first = false; full_query = all_queries_text.substr(this_query_begin - all_queries_text.data(), this_query_end - this_query_begin); if (query_fuzzer_runs) { @@ -2289,6 +2329,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) this_query_begin = this_query_end; continue; } + if (suggest) + updateSuggest(parsed_query); // Now we know for sure where the query ends. // Look for the hint in the text of query + insert data + trailing @@ -2314,7 +2356,12 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) // Check whether the error (or its absence) matches the test hints // (or their absence). bool error_matches_hint = true; - if (have_error) + bool need_retry = test_hint.needRetry(server_exception, &retries_count); + if (need_retry) + { + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + else if (have_error) { if (test_hint.hasServerErrors()) { @@ -2391,13 +2438,13 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) // , where the inline data is delimited by semicolon and not by a // newline. auto * insert_ast = parsed_query->as(); - if (insert_ast && isSyncInsertWithData(*insert_ast, global_context)) + if (insert_ast && isSyncInsertWithData(*insert_ast, client_context)) { this_query_end = insert_ast->end; adjustQueryEnd( this_query_end, all_queries_end, - static_cast(global_context->getSettingsRef().max_parser_depth), - static_cast(global_context->getSettingsRef().max_parser_backtracks)); + static_cast(client_context->getSettingsRef().max_parser_depth), + static_cast(client_context->getSettingsRef().max_parser_backtracks)); } // Report error. @@ -2408,7 +2455,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) if (have_error && !ignore_error) return is_interactive; - this_query_begin = this_query_end; + if (!need_retry) + this_query_begin = this_query_end; break; } } @@ -2433,14 +2481,6 @@ bool ClientBase::processQueryText(const String & text) return processMultiQueryFromFile(file_name); } - if (!is_multiquery) - { - assert(!query_fuzzer_runs); - processTextAsSingleQuery(text); - - return true; - } - if (query_fuzzer_runs) { processWithFuzzing(text); @@ -2527,10 +2567,10 @@ void ClientBase::runInteractive() if (load_suggestions) { /// Load suggestion data from the server. - if (global_context->getApplicationType() == Context::ApplicationType::CLIENT) - suggest->load(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load); - else if (global_context->getApplicationType() == Context::ApplicationType::LOCAL) - suggest->load(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load); + if (client_context->getApplicationType() == Context::ApplicationType::CLIENT) + suggest->load(client_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load); + else if (client_context->getApplicationType() == Context::ApplicationType::LOCAL) + suggest->load(client_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load); } if (home_path.empty()) @@ -2668,7 +2708,7 @@ void ClientBase::runInteractive() { // If a separate connection loading suggestions failed to open a new session, // use the main session to receive them. - suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), global_context->getClientInfo()); + suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), client_context->getClientInfo()); } try @@ -2717,10 +2757,10 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name) if (!getClientConfiguration().has("log_comment")) { - Settings settings = global_context->getSettings(); + Settings settings = client_context->getSettings(); /// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]" settings.log_comment = fs::absolute(fs::path(file_name)); - global_context->setSettings(settings); + client_context->setSettings(settings); } return executeMultiQuery(queries_from_file); @@ -2833,168 +2873,6 @@ void ClientBase::showClientVersion() output_stream << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; } -namespace -{ - -/// Define transparent hash to we can use -/// std::string_view with the containers -struct TransparentStringHash -{ - using is_transparent = void; - size_t operator()(std::string_view txt) const - { - return std::hash{}(txt); - } -}; - -/* - * This functor is used to parse command line arguments and replace dashes with underscores, - * allowing options to be specified using either dashes or underscores. - */ -class OptionsAliasParser -{ -public: - explicit OptionsAliasParser(const boost::program_options::options_description& options) - { - options_names.reserve(options.options().size()); - for (const auto& option : options.options()) - options_names.insert(option->long_name()); - } - - /* - * Parses arguments by replacing dashes with underscores, and matches the resulting name with known options - * Implements boost::program_options::ext_parser logic - */ - std::pair operator()(const std::string & token) const - { - if (!token.starts_with("--")) - return {}; - std::string arg = token.substr(2); - - // divide token by '=' to separate key and value if options style=long_allow_adjacent - auto pos_eq = arg.find('='); - std::string key = arg.substr(0, pos_eq); - - if (options_names.contains(key)) - // option does not require any changes, because it is already correct - return {}; - - std::replace(key.begin(), key.end(), '-', '_'); - if (!options_names.contains(key)) - // after replacing '-' with '_' argument is still unknown - return {}; - - std::string value; - if (pos_eq != std::string::npos && pos_eq < arg.size()) - value = arg.substr(pos_eq + 1); - - return {key, value}; - } - -private: - std::unordered_set options_names; -}; - -} - -/// Enable optimizations even in debug builds because otherwise options parsing becomes extremely slow affecting .sh tests -#if defined(__clang__) -#pragma clang optimize on -#endif -void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) -{ - if (allow_repeated_settings) - addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value()); - else - addProgramOptions(cmd_settings, options_description.main_description.value()); - - if (allow_merge_tree_settings) - { - /// Add merge tree settings manually, because names of some settings - /// may clash. Query settings have higher priority and we just - /// skip ambiguous merge tree settings. - auto & main_options = options_description.main_description.value(); - - std::unordered_set> main_option_names; - for (const auto & option : main_options.options()) - main_option_names.insert(option->long_name()); - - for (const auto & setting : cmd_merge_tree_settings.all()) - { - const auto add_setting = [&](const std::string_view name) - { - if (auto it = main_option_names.find(name); it != main_option_names.end()) - return; - - if (allow_repeated_settings) - addProgramOptionAsMultitoken(cmd_merge_tree_settings, main_options, name, setting); - else - addProgramOption(cmd_merge_tree_settings, main_options, name, setting); - }; - - const auto & setting_name = setting.getName(); - - add_setting(setting_name); - - const auto & settings_to_aliases = MergeTreeSettings::Traits::settingsToAliases(); - if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end()) - { - for (const auto alias : it->second) - { - add_setting(alias); - } - } - } - } - - /// Parse main commandline options. - auto parser = po::command_line_parser(arguments) - .options(options_description.main_description.value()) - .extra_parser(OptionsAliasParser(options_description.main_description.value())) - .allow_unregistered(); - po::parsed_options parsed = parser.run(); - - /// Check unrecognized options without positional options. - auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional); - if (!unrecognized_options.empty()) - { - auto hints = this->getHints(unrecognized_options[0]); - if (!hints.empty()) - throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}", - unrecognized_options[0], toString(hints)); - - throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); - } - - /// Check positional options. - for (const auto & op : parsed.options) - { - if (!op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--") - && !op.original_tokens[0].empty() && !op.value.empty()) - { - /// Two special cases for better usability: - /// - if the option contains a whitespace, it might be a query: clickhouse "SELECT 1" - /// These are relevant for interactive usage - user-friendly, but questionable in general. - /// In case of ambiguity or for scripts, prefer using proper options. - - const auto & token = op.original_tokens[0]; - po::variable_value value(boost::any(op.value), false); - - const char * option; - if (token.contains(' ')) - option = "query"; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token); - - if (!options.emplace(option, value).second) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token); - } - } - - po::store(parsed, options); -} - - void ClientBase::init(int argc, char ** argv) { namespace po = boost::program_options; @@ -3040,9 +2918,9 @@ void ClientBase::init(int argc, char ** argv) ("config-file,C", po::value(), "config-file path") - ("query,q", po::value>()->multitoken(), R"(query; can be specified multiple times (--query "SELECT 1" --query "SELECT 2"...))") + ("query,q", po::value>()->multitoken(), R"(Query. Can be specified multiple times (--query "SELECT 1" --query "SELECT 2") or once with multiple comma-separated queries (--query "SELECT 1; SELECT 2;"). In the latter case, INSERT queries with non-VALUE format must be separated by empty lines.)") ("queries-file", po::value>()->multitoken(), "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)") - ("multiquery,n", "If specified, multiple queries separated by semicolons can be listed after --query. For convenience, it is also possible to omit --query and pass the queries directly after --multiquery.") + ("multiquery,n", "Obsolete, does nothing") ("multiline,m", "If specified, allow multiline queries (do not send the query on Enter)") ("database,d", po::value(), "database") ("query_kind", po::value()->default_value("initial_query"), "One of initial_query/secondary_query/no_query") @@ -3071,7 +2949,7 @@ void ClientBase::init(int argc, char ** argv) ("vertical,E", "vertical output format, same as --format=Vertical or FORMAT Vertical or \\G at end of command") ("highlight", po::value()->default_value(true), "enable or disable basic syntax highlight in interactive command line") - ("ignore-error", "do not stop processing in multiquery mode") + ("ignore-error", "do not stop processing when an error occurs") ("stacktrace", "print stack traces of exceptions") ("hardware-utilization", "print hardware utilization information in progress bar") ("print-profile-events", po::value(&profile_events.print)->zero_tokens(), "Printing ProfileEvents packets") @@ -3164,8 +3042,6 @@ void ClientBase::init(int argc, char ** argv) queries_files = options["queries-file"].as>(); if (options.count("multiline")) getClientConfiguration().setBool("multiline", true); - if (options.count("multiquery")) - getClientConfiguration().setBool("multiquery", true); if (options.count("ignore-error")) getClientConfiguration().setBool("ignore-error", true); if (options.count("format")) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 4f500a4c45d..7689744a373 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -17,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -156,8 +156,6 @@ protected: void setInsertionTable(const ASTInsertQuery & insert_query); - void addMultiquery(std::string_view query, Arguments & common_arguments) const; - private: void receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, bool partial_result_on_first_cancel); bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_); @@ -206,6 +204,9 @@ protected: /// Adjust some settings after command line options and config had been processed. void adjustSettings(); + /// Initializes the client context. + void initClientContext(); + void setDefaultFormatsAndCompressionFromConfiguration(); void initTTYBuffer(ProgressOption progress); @@ -215,6 +216,9 @@ protected: SharedContextHolder shared_context; ContextMutablePtr global_context; + /// Client context is a context used only by the client to parse queries, process query parameters and to connect to clickhouse-server. + ContextMutablePtr client_context; + LoggerPtr fatal_log; Poco::AutoPtr fatal_channel_ptr; Poco::AutoPtr fatal_console_channel_ptr; @@ -223,7 +227,6 @@ protected: std::unique_ptr signal_listener; bool is_interactive = false; /// Use either interactive line editing interface or batch mode. - bool is_multiquery = false; bool delayed_interactive = false; bool echo_queries = false; /// Print queries before execution in batch mode. diff --git a/src/Client/ClientBaseOptimizedParts.cpp b/src/Client/ClientBaseOptimizedParts.cpp new file mode 100644 index 00000000000..421843a0e79 --- /dev/null +++ b/src/Client/ClientBaseOptimizedParts.cpp @@ -0,0 +1,176 @@ +#include +#include + +namespace DB +{ + +/** + * Program options parsing is very slow in debug builds and it affects .sh tests + * causing them to timeout sporadically. + * It seems impossible to enable optimizations for a single function (only to disable them), so + * instead we extract the code to a separate source file and compile it with different options. + */ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int UNRECOGNIZED_ARGUMENTS; +} + +namespace +{ + +/// Define transparent hash to we can use +/// std::string_view with the containers +struct TransparentStringHash +{ + using is_transparent = void; + size_t operator()(std::string_view txt) const + { + return std::hash{}(txt); + } +}; + +/* + * This functor is used to parse command line arguments and replace dashes with underscores, + * allowing options to be specified using either dashes or underscores. + */ +class OptionsAliasParser +{ +public: + explicit OptionsAliasParser(const boost::program_options::options_description& options) + { + options_names.reserve(options.options().size()); + for (const auto& option : options.options()) + options_names.insert(option->long_name()); + } + + /* + * Parses arguments by replacing dashes with underscores, and matches the resulting name with known options + * Implements boost::program_options::ext_parser logic + */ + std::pair operator()(const std::string & token) const + { + if (!token.starts_with("--")) + return {}; + std::string arg = token.substr(2); + + // divide token by '=' to separate key and value if options style=long_allow_adjacent + auto pos_eq = arg.find('='); + std::string key = arg.substr(0, pos_eq); + + if (options_names.contains(key)) + // option does not require any changes, because it is already correct + return {}; + + std::replace(key.begin(), key.end(), '-', '_'); + if (!options_names.contains(key)) + // after replacing '-' with '_' argument is still unknown + return {}; + + std::string value; + if (pos_eq != std::string::npos && pos_eq < arg.size()) + value = arg.substr(pos_eq + 1); + + return {key, value}; + } + +private: + std::unordered_set options_names; +}; + +} + +void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +{ + if (allow_repeated_settings) + addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value()); + else + addProgramOptions(cmd_settings, options_description.main_description.value()); + + if (allow_merge_tree_settings) + { + /// Add merge tree settings manually, because names of some settings + /// may clash. Query settings have higher priority and we just + /// skip ambiguous merge tree settings. + auto & main_options = options_description.main_description.value(); + + std::unordered_set> main_option_names; + for (const auto & option : main_options.options()) + main_option_names.insert(option->long_name()); + + for (const auto & setting : cmd_merge_tree_settings.all()) + { + const auto add_setting = [&](const std::string_view name) + { + if (auto it = main_option_names.find(name); it != main_option_names.end()) + return; + + if (allow_repeated_settings) + addProgramOptionAsMultitoken(cmd_merge_tree_settings, main_options, name, setting); + else + addProgramOption(cmd_merge_tree_settings, main_options, name, setting); + }; + + const auto & setting_name = setting.getName(); + + add_setting(setting_name); + + const auto & settings_to_aliases = MergeTreeSettings::Traits::settingsToAliases(); + if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end()) + { + for (const auto alias : it->second) + { + add_setting(alias); + } + } + } + } + + /// Parse main commandline options. + auto parser = po::command_line_parser(arguments) + .options(options_description.main_description.value()) + .extra_parser(OptionsAliasParser(options_description.main_description.value())) + .allow_unregistered(); + po::parsed_options parsed = parser.run(); + + /// Check unrecognized options without positional options. + auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional); + if (!unrecognized_options.empty()) + { + auto hints = this->getHints(unrecognized_options[0]); + if (!hints.empty()) + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}", + unrecognized_options[0], toString(hints)); + + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); + } + + /// Check positional options. + for (const auto & op : parsed.options) + { + if (!op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--") + && !op.original_tokens[0].empty() && !op.value.empty()) + { + /// Two special cases for better usability: + /// - if the option contains a whitespace, it might be a query: clickhouse "SELECT 1" + /// These are relevant for interactive usage - user-friendly, but questionable in general. + /// In case of ambiguity or for scripts, prefer using proper options. + + const auto & token = op.original_tokens[0]; + po::variable_value value(boost::any(op.value), false); + + const char * option; + if (token.contains(' ')) + option = "query"; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token); + + if (!options.emplace(option, value).second) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token); + } + } + + po::store(parsed, options); +} + +} diff --git a/src/Common/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp similarity index 97% rename from src/Common/QueryFuzzer.cpp rename to src/Client/QueryFuzzer.cpp index 161c38f20e0..f5b700ea529 100644 --- a/src/Common/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -68,21 +68,22 @@ Field QueryFuzzer::getRandomField(int type) { case 0: { - return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)]; + return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) + / sizeof(*bad_int64_values))]; } case 1: { static constexpr double values[] = {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999, 1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20, - FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)]; + FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))]; } case 2: { static constexpr UInt64 scales[] = {0, 1, 2, 10}; return DecimalField( - bad_int64_values[fuzz_rand() % std::size(bad_int64_values)], - static_cast(scales[fuzz_rand() % std::size(scales)]) + bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))], + static_cast(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]) ); } default: @@ -164,8 +165,7 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - if (debug_stream) - *debug_stream << "erased\n"; + std::cerr << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -174,14 +174,12 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - if (debug_stream) - *debug_stream << fmt::format("inserted (pos {})\n", pos); + std::cerr << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - if (debug_stream) - *debug_stream << "inserted (0)\n"; + std::cerr << "inserted (0)\n"; } } @@ -199,9 +197,7 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - - if (debug_stream) - *debug_stream << "erased\n"; + std::cerr << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -210,16 +206,12 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - - if (debug_stream) - *debug_stream << fmt::format("inserted (pos {})\n", pos); + std::cerr << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - - if (debug_stream) - *debug_stream << "inserted (0)\n"; + std::cerr << "inserted (0)\n"; } } @@ -352,8 +344,7 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) } else { - if (debug_stream) - *debug_stream << "No random column.\n"; + std::cerr << "No random column.\n"; } } @@ -387,8 +378,7 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) if (col) impl->children.insert(pos, col); else - if (debug_stream) - *debug_stream << "No random column.\n"; + std::cerr << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by @@ -1371,15 +1361,11 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) collectFuzzInfoMain(ast); fuzz(ast); - if (out_stream) - { - *out_stream << std::endl; - - WriteBufferFromOStream ast_buf(*out_stream, 4096); - formatAST(*ast, ast_buf, false /*highlight*/); - ast_buf.finalize(); - *out_stream << std::endl << std::endl; - } + std::cout << std::endl; + WriteBufferFromOStream ast_buf(std::cout, 4096); + formatAST(*ast, ast_buf, false /*highlight*/); + ast_buf.finalize(); + std::cout << std::endl << std::endl; } } diff --git a/src/Common/QueryFuzzer.h b/src/Client/QueryFuzzer.h similarity index 91% rename from src/Common/QueryFuzzer.h rename to src/Client/QueryFuzzer.h index 35d088809f2..6165e589cae 100644 --- a/src/Common/QueryFuzzer.h +++ b/src/Client/QueryFuzzer.h @@ -35,31 +35,9 @@ struct ASTWindowDefinition; * queries, so you want to feed it a lot of queries to get some interesting mix * of them. Normally we feed SQL regression tests to it. */ -class QueryFuzzer +struct QueryFuzzer { -public: - explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) - : fuzz_rand(fuzz_rand_) - , out_stream(out_stream_) - , debug_stream(debug_stream_) - { - } - - // This is the only function you have to call -- it will modify the passed - // ASTPtr to point to new AST with some random changes. - void fuzzMain(ASTPtr & ast); - - ASTs getInsertQueriesForFuzzedTables(const String & full_query); - ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); - void notifyQueryFailed(ASTPtr ast); - - static bool isSuitableForFuzzing(const ASTCreateQuery & create); - -private: - pcg64 fuzz_rand; - - std::ostream * out_stream = nullptr; - std::ostream * debug_stream = nullptr; + pcg64 fuzz_rand{randomSeed()}; // We add elements to expression lists with fixed probability. Some elements // are so large, that the expected number of elements we add to them is @@ -88,6 +66,10 @@ private: std::unordered_map index_of_fuzzed_table; std::set created_tables_hashes; + // This is the only function you have to call -- it will modify the passed + // ASTPtr to point to new AST with some random changes. + void fuzzMain(ASTPtr & ast); + // Various helper functions follow, normally you shouldn't have to call them. Field getRandomField(int type); Field fuzzField(Field field); @@ -95,6 +77,9 @@ private: ASTPtr getRandomExpressionList(); DataTypePtr fuzzDataType(DataTypePtr type); DataTypePtr getRandomType(); + ASTs getInsertQueriesForFuzzedTables(const String & full_query); + ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); + void notifyQueryFailed(ASTPtr ast); void replaceWithColumnLike(ASTPtr & ast); void replaceWithTableLike(ASTPtr & ast); void fuzzOrderByElement(ASTOrderByElement * elem); @@ -117,6 +102,8 @@ private: void addTableLike(ASTPtr ast); void addColumnLike(ASTPtr ast); void collectFuzzInfoRecurse(ASTPtr ast); + + static bool isSuitableForFuzzing(const ASTCreateQuery & create); }; } diff --git a/src/Client/TestHint.cpp b/src/Client/TestHint.cpp index b64882577ee..74c65009a73 100644 --- a/src/Client/TestHint.cpp +++ b/src/Client/TestHint.cpp @@ -10,6 +10,7 @@ namespace DB::ErrorCodes { extern const int CANNOT_PARSE_TEXT; + extern const int OK; } namespace DB @@ -62,9 +63,28 @@ bool TestHint::hasExpectedServerError(int error) return std::find(server_errors.begin(), server_errors.end(), error) != server_errors.end(); } +bool TestHint::needRetry(const std::unique_ptr & server_exception, size_t * retries_counter) +{ + chassert(retries_counter); + if (max_retries <= *retries_counter) + return false; + + ++*retries_counter; + + int error = ErrorCodes::OK; + if (server_exception) + error = server_exception->code(); + + + if (retry_until) + return !hasExpectedServerError(error); /// retry until we get the expected error + else + return hasExpectedServerError(error); /// retry while we have the expected error +} + void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint) { - std::unordered_set commands{"echo", "echoOn", "echoOff"}; + std::unordered_set commands{"echo", "echoOn", "echoOff", "retry"}; std::unordered_set command_errors{ "serverError", @@ -73,6 +93,9 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint) for (Token token = comment_lexer.nextToken(); !token.isEnd(); token = comment_lexer.nextToken()) { + if (token.type == TokenType::Whitespace) + continue; + String item = String(token.begin, token.end); if (token.type == TokenType::BareWord && commands.contains(item)) { @@ -82,6 +105,30 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint) echo.emplace(true); if (item == "echoOff") echo.emplace(false); + + if (item == "retry") + { + token = comment_lexer.nextToken(); + while (token.type == TokenType::Whitespace) + token = comment_lexer.nextToken(); + + if (token.type != TokenType::Number) + throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Could not parse the number of retries: {}", + std::string_view(token.begin, token.end)); + + max_retries = std::stoul(std::string(token.begin, token.end)); + + token = comment_lexer.nextToken(); + while (token.type == TokenType::Whitespace) + token = comment_lexer.nextToken(); + + if (token.type != TokenType::BareWord || + (std::string_view(token.begin, token.end) != "until" && + std::string_view(token.begin, token.end) != "while")) + throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Expected 'until' or 'while' after the number of retries, got: {}", + std::string_view(token.begin, token.end)); + retry_until = std::string_view(token.begin, token.end) == "until"; + } } else if (!is_leading_hint && token.type == TokenType::BareWord && command_errors.contains(item)) { @@ -133,6 +180,9 @@ void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint) break; } } + + if (max_retries && server_errors.size() != 1) + throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Expected one serverError after the 'retry N while|until' command"); } } diff --git a/src/Client/TestHint.h b/src/Client/TestHint.h index b76c4245df4..bbe7873c08b 100644 --- a/src/Client/TestHint.h +++ b/src/Client/TestHint.h @@ -6,6 +6,7 @@ #include #include +#include namespace DB @@ -65,12 +66,17 @@ public: bool hasExpectedClientError(int error); bool hasExpectedServerError(int error); + bool needRetry(const std::unique_ptr & server_exception, size_t * retries_counter); + private: const String & query; ErrorVector server_errors{}; ErrorVector client_errors{}; std::optional echo; + size_t max_retries = 0; + bool retry_until = false; + void parse(Lexer & comment_lexer, bool is_leading_hint); bool allErrorsExpected(int actual_server_error, int actual_client_error) const diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 33bd1266c90..e26fe790a8e 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -267,7 +267,7 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length) #else void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length) @@ -465,7 +465,7 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size insertMergeFrom(from, n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n) #else void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n) diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 330a707b75c..b581c3ba3b4 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -145,7 +145,7 @@ public: void insertData(const char * pos, size_t length) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & from, size_t n) override; #else using IColumn::insertFrom; @@ -189,7 +189,7 @@ public: void protect() override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & from, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override; @@ -212,7 +212,7 @@ public: MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override #else int doCompareAt(size_t, size_t, const IColumn &, int) const override diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 9203fb8042f..19cce678cc7 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -336,7 +336,7 @@ bool ColumnArray::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnArray::insertFrom(const IColumn & src_, size_t n) #else void ColumnArray::doInsertFrom(const IColumn & src_, size_t n) @@ -395,7 +395,7 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan : 1); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const #else int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const @@ -542,7 +542,7 @@ void ColumnArray::getExtremes(Field & min, Field & max) const } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 5e01b9144d7..63affb86d9d 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -84,14 +84,14 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; #endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; #else void doInsertFrom(const IColumn & src_, size_t n) override; @@ -103,7 +103,7 @@ public: ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index 19470113394..c4270e8216b 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -86,7 +86,7 @@ public: bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); } void insert(const Field &) override { throwMustBeDecompressed(); } bool tryInsert(const Field &) override { throwMustBeDecompressed(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } #else void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } @@ -105,7 +105,7 @@ public: void expand(const Filter &, bool) override { throwMustBeDecompressed(); } ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); } ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } #else int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 65ce53687b9..ca38e76ea57 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -123,7 +123,7 @@ public: return data->isNullAt(0); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override #else void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override @@ -151,7 +151,7 @@ public: ++s; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn &, size_t) override #else void doInsertFrom(const IColumn &, size_t) override @@ -160,7 +160,7 @@ public: ++s; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } #else void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } @@ -237,7 +237,7 @@ public: return data->allocatedBytes() + sizeof(s); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override #else int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index e27807950ae..bb4433f8956 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -31,7 +31,7 @@ namespace ErrorCodes } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const #else int ColumnDecimal::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const @@ -333,7 +333,7 @@ void ColumnDecimal::insertData(const char * src, size_t /*length*/) } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDecimal::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnDecimal::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index eb8a305a822..59bfbd2159c 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -55,13 +55,13 @@ public: void reserve(size_t n) override { data.reserve_exact(n); } void shrinkToFit() override { data.shrink_to_fit(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } #else void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override @@ -76,7 +76,7 @@ public: void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } void insert(const Field & x) override { data.push_back(x.get()); } bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -104,7 +104,7 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index c735238f515..a92d54dd675 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -215,7 +215,7 @@ bool ColumnDynamic::tryInsert(const DB::Field & x) } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) #else void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) @@ -269,7 +269,7 @@ void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) #else void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) @@ -439,7 +439,7 @@ void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, si } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) #else void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) @@ -603,7 +603,7 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const #else int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 6f09abb945a..e92cabd3db9 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -144,7 +144,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; @@ -221,7 +221,7 @@ public: return scattered_columns; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 4d17eb0bebd..0bb3f7edb14 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -74,7 +74,7 @@ bool ColumnFixedString::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) #else void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) @@ -90,7 +90,7 @@ void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -225,7 +225,7 @@ size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation & return elements.size(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 630c6c1c0a6..676ac7712ba 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -98,13 +98,13 @@ public: bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t index) override; #else void doInsertFrom(const IColumn & src_, size_t index) override; #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; @@ -137,7 +137,7 @@ public: void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override #else int doCompareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override @@ -156,7 +156,7 @@ public: size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index fa57f35a823..fc81efaac0c 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -72,7 +72,7 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFunction::insertFrom(const IColumn & src, size_t n) #else void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) @@ -93,7 +93,7 @@ void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) ++elements_size; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index dcd67aecad7..b62c6bf70eb 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -95,12 +95,12 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; #else void doInsertFrom(const IColumn & src, size_t n) override; #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override; @@ -146,7 +146,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override #else int doCompareAt(size_t, size_t, const IColumn &, int) const override diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 49ce948bf78..a977046b07f 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -158,7 +158,7 @@ void ColumnLowCardinality::insertDefault() idx.insertPosition(getDictionary().getDefaultValueIndex()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n) #else void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n) @@ -190,7 +190,7 @@ void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n) idx.insertPosition(getDictionary().uniqueInsertFrom(src, n)); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -362,7 +362,7 @@ int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index fb0c1237fcf..3766b247d60 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -78,14 +78,14 @@ public: bool tryInsert(const Field & x) override; void insertDefault() override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; #else void doInsertFrom(const IColumn & src, size_t n) override; #endif void insertFromFullColumn(const IColumn & src, size_t n); -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -135,7 +135,7 @@ public: return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit)); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 08d7734ac6b..1025b4e77b9 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -153,7 +153,7 @@ void ColumnMap::updateHashFast(SipHash & hash) const nested->updateHashFast(hash); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnMap::insertFrom(const IColumn & src, size_t n) #else void ColumnMap::doInsertFrom(const IColumn & src, size_t n) @@ -162,7 +162,7 @@ void ColumnMap::doInsertFrom(const IColumn & src, size_t n) nested->insertFrom(assert_cast(src).getNestedColumn(), n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -171,7 +171,7 @@ void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t le assert_cast(*nested).insertManyFrom(assert_cast(src).getNestedColumn(), position, length); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -222,7 +222,7 @@ MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & sele return res; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 29275e1b5f7..3eaaa0ad562 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -67,7 +67,7 @@ public: WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -83,7 +83,7 @@ public: ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 867c9149242..6529f0b78db 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -217,7 +217,7 @@ const char * ColumnNullable::skipSerializedInArena(const char * pos) const return pos; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -258,7 +258,7 @@ bool ColumnNullable::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnNullable::insertFrom(const IColumn & src, size_t n) #else void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) @@ -270,7 +270,7 @@ void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -410,7 +410,7 @@ int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const #else int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 15bbd8c3b57..fe9f5b6dcc2 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -69,7 +69,7 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -77,7 +77,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else @@ -100,7 +100,7 @@ public: void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 9c9dade3dd8..a6431007cb6 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -763,7 +763,7 @@ void ColumnObject::get(size_t n, Field & res) const } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnObject::insertFrom(const IColumn & src, size_t n) #else void ColumnObject::doInsertFrom(const IColumn & src, size_t n) @@ -772,7 +772,7 @@ void ColumnObject::doInsertFrom(const IColumn & src, size_t n) insert(src[n]); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index 5303adf0881..25cfaa550f6 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -211,7 +211,7 @@ public: bool tryInsert(const Field & field) override; void insertDefault() override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else @@ -236,7 +236,7 @@ public: /// Order of rows in ColumnObject is undefined. void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override; void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {} -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } #else int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 8f98a4433d3..a908d970a15 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -174,7 +174,7 @@ const char * ColumnSparse::skipSerializedInArena(const char * pos) const return values->skipSerializedInArena(pos); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -252,7 +252,7 @@ bool ColumnSparse::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnSparse::insertFrom(const IColumn & src, size_t n) #else void ColumnSparse::doInsertFrom(const IColumn & src, size_t n) @@ -454,7 +454,7 @@ ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray & indexes, size_t l return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const #else int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 392a6910956..7a4d914e62a 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -81,14 +81,14 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char *) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; #endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; #else void doInsertFrom(const IColumn & src, size_t n) override; @@ -106,7 +106,7 @@ public: template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 4accfbe8f41..7cfa2571f5a 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -39,7 +39,7 @@ ColumnString::ColumnString(const ColumnString & src) last_offset, chars.size()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -132,7 +132,7 @@ WeakHash32 ColumnString::getWeakHash32() const } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index faaaa8848ca..c1012e1e55e 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -142,7 +142,7 @@ public: return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override #else void doInsertFrom(const IColumn & src_, size_t n) override @@ -169,7 +169,7 @@ public: } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; @@ -220,7 +220,7 @@ public: hash.update(reinterpret_cast(chars.data()), chars.size() * sizeof(chars[0])); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -250,7 +250,7 @@ public: offsets.push_back(offsets.back() + 1); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 6a0d85cba69..4fc3f88a87c 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -206,7 +206,7 @@ bool ColumnTuple::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnTuple::insertFrom(const IColumn & src_, size_t n) #else void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) @@ -223,7 +223,7 @@ void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) columns[i]->insertFrom(*src.columns[i], n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -327,7 +327,7 @@ void ColumnTuple::updateHashFast(SipHash & hash) const column->updateHashFast(hash); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -483,7 +483,7 @@ int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_ return 0; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 2fafd93f776..16b47a993f6 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -66,7 +66,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else @@ -83,7 +83,7 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -94,7 +94,7 @@ public: ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index ec1f8e0a4d5..d6cb75679be 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -90,7 +90,7 @@ public: return getNestedColumn()->updateHashWithValue(n, hash_func); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; @@ -492,7 +492,7 @@ const char * ColumnUnique::skipSerializedInArena(const char *) const } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnUnique::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index 8fd6e1bbac1..de7efb41d19 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -595,7 +595,7 @@ void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVariant::insertFrom(const IColumn & src_, size_t n) #else void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) @@ -604,7 +604,7 @@ void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) insertFromImpl(src_, n, nullptr); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) #else void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) @@ -613,7 +613,7 @@ void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t insertRangeFromImpl(src_, start, length, nullptr); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) #else void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) @@ -1175,7 +1175,7 @@ bool ColumnVariant::hasEqualValues() const return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index 94f3066e676..34c24b5428d 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -180,7 +180,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; void insertManyFrom(const IColumn & src_, size_t position, size_t length) override; @@ -223,7 +223,7 @@ public: ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; ColumnPtr replicate(const Offsets & replicate_offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 185a1e0f615..c474efe35bd 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -502,7 +502,7 @@ bool ColumnVector::tryInsert(const DB::Field & x) } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnVector::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index c01778ecf32..2fe5b635bd2 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -64,7 +64,7 @@ public: return data.size(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override #else void doInsertFrom(const IColumn & src, size_t n) override @@ -73,7 +73,7 @@ public: data.push_back(assert_cast(src).getData()[n]); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override @@ -150,7 +150,7 @@ public: } /// This method implemented in header because it could be possibly devirtualized. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override @@ -240,7 +240,7 @@ public: bool tryInsert(const DB::Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 552e52cf51c..a189903b617 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -46,7 +46,7 @@ String IColumn::dumpStructure() const return res.str(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void IColumn::insertFrom(const IColumn & src, size_t n) #else void IColumn::doInsertFrom(const IColumn & src, size_t n) diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 3798d3b7466..f9c1a3e7034 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -179,7 +179,7 @@ public: /// Appends n-th element from other column with the same type. /// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) virtual void insertFrom(const IColumn & src, size_t n); #else void insertFrom(const IColumn & src, size_t n) @@ -191,7 +191,7 @@ public: /// Appends range of elements from other column with the same type. /// Could be used to concatenate columns. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; #else void insertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -202,7 +202,7 @@ public: #endif /// Appends one element from other column with the same type multiple times. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) virtual void insertManyFrom(const IColumn & src, size_t position, size_t length) { for (size_t i = 0; i < length; ++i) @@ -345,7 +345,7 @@ public: * * For non Nullable and non floating point types, nan_direction_hint is ignored. */ -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) [[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; #else [[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const @@ -667,7 +667,7 @@ protected: Sort full_sort, PartialSort partial_sort) const; -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) virtual void doInsertFrom(const IColumn & src, size_t n); virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index b18f4fdb302..40d410e207d 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -27,7 +27,7 @@ public: size_t byteSize() const override { return 0; } size_t byteSizeAt(size_t) const override { return 0; } size_t allocatedBytes() const override { return 0; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } #else int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } @@ -73,7 +73,7 @@ public: { } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn &, size_t) override #else void doInsertFrom(const IColumn &, size_t) override @@ -82,7 +82,7 @@ public: ++s; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override #else void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index 1b86204f5b1..a8e10e5e2b2 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -86,7 +86,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique."); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t, size_t) override #else void doInsertRangeFrom(const IColumn &, size_t, size_t) override diff --git a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp index 645f6ed79f3..240099f0ae5 100644 --- a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp +++ b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp @@ -52,7 +52,7 @@ static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows) } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src) #else static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 02bde0d80b7..ef8bdfc1823 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -45,26 +46,33 @@ namespace /// kernel 5 /// rss 15 /// [...] -uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key) +using Metrics = std::map; + +Metrics readAllMetricsFromStatFile(ReadBufferFromFile & buf) { + Metrics metrics; while (!buf.eof()) { std::string current_key; readStringUntilWhitespace(current_key, buf); - if (current_key != key) - { - std::string dummy; - readStringUntilNewlineInto(dummy, buf); - buf.ignore(); - continue; - } assertChar(' ', buf); + uint64_t value = 0; readIntText(value, buf); - return value; - } + assertChar('\n', buf); + auto [_, inserted] = metrics.emplace(std::move(current_key), value); + chassert(inserted, "Duplicate keys in stat file"); + } + return metrics; +} + +uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key) +{ + const auto all_metrics = readAllMetricsFromStatFile(buf); + if (const auto it = all_metrics.find(key); it != all_metrics.end()) + return it->second; throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName()); } @@ -79,6 +87,13 @@ struct CgroupsV1Reader : ICgroupsReader return readMetricFromStatFile(buf, "rss"); } + std::string dumpAllStats() override + { + std::lock_guard lock(mutex); + buf.rewind(); + return fmt::format("{}", readAllMetricsFromStatFile(buf)); + } + private: std::mutex mutex; ReadBufferFromFile buf TSA_GUARDED_BY(mutex); @@ -106,6 +121,13 @@ struct CgroupsV2Reader : ICgroupsReader return mem_usage; } + std::string dumpAllStats() override + { + std::lock_guard lock(mutex); + stat_buf.rewind(); + return fmt::format("{}", readAllMetricsFromStatFile(stat_buf)); + } + private: std::mutex mutex; ReadBufferFromFile current_buf TSA_GUARDED_BY(mutex); @@ -178,10 +200,7 @@ CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait { const auto [cgroup_path, version] = getCgroupsPath(); - if (version == CgroupsVersion::V2) - cgroup_reader = std::make_unique(cgroup_path); - else - cgroup_reader = std::make_unique(cgroup_path); + cgroup_reader = createCgroupsReader(version, cgroup_path); LOG_INFO( log, @@ -234,7 +253,12 @@ void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint # endif /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. uint64_t memory_usage = cgroup_reader->readMemoryUsage(); - LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); + LOG_TRACE( + log, + "Read current memory usage {} bytes ({}) from cgroups, full available stats: {}", + memory_usage, + ReadableSize(memory_usage), + cgroup_reader->dumpAllStats()); MemoryTracker::setRSS(memory_usage, 0); LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); @@ -338,6 +362,13 @@ void CgroupsMemoryUsageObserver::runThread() } } +std::unique_ptr createCgroupsReader(CgroupsMemoryUsageObserver::CgroupsVersion version, const fs::path & cgroup_path) +{ + if (version == CgroupsMemoryUsageObserver::CgroupsVersion::V2) + return std::make_unique(cgroup_path); + else + return std::make_unique(cgroup_path); +} } #endif diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index b848a2bff3c..7f888fe631b 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -14,6 +14,8 @@ struct ICgroupsReader virtual ~ICgroupsReader() = default; virtual uint64_t readMemoryUsage() = 0; + + virtual std::string dumpAllStats() = 0; }; /// Does two things: @@ -81,6 +83,9 @@ private: bool quit = false; }; +std::unique_ptr +createCgroupsReader(CgroupsMemoryUsageObserver::CgroupsVersion version, const std::filesystem::path & cgroup_path); + #else class CgroupsMemoryUsageObserver { diff --git a/src/Common/Config/AbstractConfigurationComparison.cpp b/src/Common/Config/AbstractConfigurationComparison.cpp index 607b583cf31..80c837ed43b 100644 --- a/src/Common/Config/AbstractConfigurationComparison.cpp +++ b/src/Common/Config/AbstractConfigurationComparison.cpp @@ -38,7 +38,7 @@ namespace std::erase_if(left_subkeys, [&](const String & key) { return ignore_keys->contains(key); }); std::erase_if(right_subkeys, [&](const String & key) { return ignore_keys->contains(key); }); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) /// Compound `ignore_keys` are not yet implemented. for (const auto & ignore_key : *ignore_keys) chassert(ignore_key.find('.') == std::string_view::npos); diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 7c97e73f278..1011ab12d15 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -234,10 +234,10 @@ M(PartsCommitted, "Deprecated. See PartsActive.") \ M(PartsPreActive, "The part is in data_parts, but not used for SELECTs.") \ M(PartsActive, "Active data part, used by current and upcoming SELECTs.") \ - M(AttachedDatabase, "Active database, used by current and upcoming SELECTs.") \ - M(AttachedTable, "Active table, used by current and upcoming SELECTs.") \ - M(AttachedView, "Active view, used by current and upcoming SELECTs.") \ - M(AttachedDictionary, "Active dictionary, used by current and upcoming SELECTs.") \ + M(AttachedDatabase, "Active databases.") \ + M(AttachedTable, "Active tables.") \ + M(AttachedView, "Active views.") \ + M(AttachedDictionary, "Active dictionaries.") \ M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \ M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \ M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \ diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 111280074dd..d68537513da 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -64,7 +65,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc { // In debug builds and builds with sanitizers, treat LOGICAL_ERROR as an assertion failure. // Log the message before we fail. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD if (code == ErrorCodes::LOGICAL_ERROR) { abortOnFailedAssertion(msg, trace.data(), 0, trace.size()); @@ -100,7 +101,7 @@ Exception::Exception(const MessageMasked & msg_masked, int code, bool remote_) { if (terminate_on_any_exception) std::_Exit(terminate_status_code); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); handle_error_code(msg_masked.msg, code, remote, getStackFramePointers()); } @@ -110,7 +111,7 @@ Exception::Exception(MessageMasked && msg_masked, int code, bool remote_) { if (terminate_on_any_exception) std::_Exit(terminate_status_code); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); handle_error_code(message(), code, remote, getStackFramePointers()); } @@ -119,7 +120,7 @@ Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc) { if (terminate_on_any_exception) std::_Exit(terminate_status_code); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); #ifdef STD_EXCEPTION_HAS_STACK_TRACE auto * stack_trace_frames = exc.get_stack_trace_frames(); auto stack_trace_size = exc.get_stack_trace_size(); @@ -133,7 +134,7 @@ Exception::Exception(CreateFromSTDTag, const std::exception & exc) { if (terminate_on_any_exception) std::_Exit(terminate_status_code); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); #ifdef STD_EXCEPTION_HAS_STACK_TRACE auto * stack_trace_frames = exc.get_stack_trace_frames(); auto stack_trace_size = exc.get_stack_trace_size(); @@ -223,10 +224,38 @@ Exception::FramePointers Exception::getStackFramePointers() const } thread_local bool Exception::enable_job_stack_trace = false; -thread_local std::vector Exception::thread_frame_pointers = {}; +thread_local bool Exception::can_use_thread_frame_pointers = false; +thread_local Exception::ThreadFramePointers Exception::thread_frame_pointers; + +Exception::ThreadFramePointers::ThreadFramePointers() +{ + can_use_thread_frame_pointers = true; +} + +Exception::ThreadFramePointers::~ThreadFramePointers() +{ + can_use_thread_frame_pointers = false; +} + +Exception::ThreadFramePointersBase Exception::getThreadFramePointers() +{ + if (can_use_thread_frame_pointers) + return thread_frame_pointers.frame_pointers; + + return {}; +} + +void Exception::setThreadFramePointers(ThreadFramePointersBase frame_pointers) +{ + if (can_use_thread_frame_pointers) + thread_frame_pointers.frame_pointers = std::move(frame_pointers); +} static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string & start_of_message) { + if (!isLoggingEnabled()) + return; + try { PreformattedMessage message = getCurrentExceptionMessageAndPattern(true); @@ -242,6 +271,9 @@ static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string void tryLogCurrentException(const char * log_name, const std::string & start_of_message) { + if (!isLoggingEnabled()) + return; + /// Under high memory pressure, new allocations throw a /// MEMORY_LIMIT_EXCEEDED exception. /// @@ -443,7 +475,7 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b } catch (...) {} // NOLINT(bugprone-empty-catch) -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD try { throw; diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 4e54c411bf1..a4f55f41caa 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -10,7 +10,6 @@ #include #include -#include #include #include @@ -49,14 +48,14 @@ public: { if (terminate_on_any_exception) std::terminate(); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); } Exception(const PreformattedMessage & msg, int code): Exception(msg.text, code) { if (terminate_on_any_exception) std::terminate(); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); message_format_string = msg.format_string; message_format_string_args = msg.format_string_args; } @@ -65,18 +64,36 @@ public: { if (terminate_on_any_exception) std::terminate(); - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); message_format_string = msg.format_string; message_format_string_args = msg.format_string_args; } /// Collect call stacks of all previous jobs' schedulings leading to this thread job's execution static thread_local bool enable_job_stack_trace; - static thread_local std::vector thread_frame_pointers; + static thread_local bool can_use_thread_frame_pointers; + /// Because of unknown order of static destructor calls, + /// thread_frame_pointers can already be uninitialized when a different destructor generates an exception. + /// To prevent such scenarios, a wrapper class is created and a function that will return empty vector + /// if its destructor is already called + using ThreadFramePointersBase = std::vector; + struct ThreadFramePointers + { + ThreadFramePointers(); + ~ThreadFramePointers(); + + ThreadFramePointersBase frame_pointers; + }; + + static ThreadFramePointersBase getThreadFramePointers(); + static void setThreadFramePointers(ThreadFramePointersBase frame_pointers); + /// Callback for any exception static std::function callback; protected: + static thread_local ThreadFramePointers thread_frame_pointers; + // used to remove the sensitive information from exceptions if query_masking_rules is configured struct MessageMasked { @@ -178,7 +195,7 @@ class ErrnoException : public Exception public: ErrnoException(std::string && msg, int code, int with_errno) : Exception(msg, code), saved_errno(with_errno) { - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); addMessage(", {}", errnoToString(saved_errno)); } @@ -187,7 +204,7 @@ public: requires std::is_convertible_v ErrnoException(int code, T && message) : Exception(message, code), saved_errno(errno) { - capture_thread_frame_pointers = thread_frame_pointers; + capture_thread_frame_pointers = getThreadFramePointers(); addMessage(", {}", errnoToString(saved_errno)); } diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index 5454cba8e2e..f5ec8cf0356 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -57,7 +57,9 @@ static struct InitFiu PAUSEABLE_ONCE(finish_clean_quorum_failed_parts) \ PAUSEABLE(dummy_pausable_failpoint) \ ONCE(execute_query_calling_empty_set_result_func_on_exception) \ - ONCE(receive_timeout_on_table_status_response) + ONCE(receive_timeout_on_table_status_response) \ + REGULAR(keepermap_fail_drop_data) \ + REGULAR(lazy_pipe_fds_fail_close) \ namespace FailPoints diff --git a/src/Common/Logger.cpp b/src/Common/Logger.cpp index c8d557bc3a3..bd848abe353 100644 --- a/src/Common/Logger.cpp +++ b/src/Common/Logger.cpp @@ -25,3 +25,15 @@ bool hasLogger(const std::string & name) { return Poco::Logger::has(name); } + +static constinit std::atomic allow_logging{true}; + +bool isLoggingEnabled() +{ + return allow_logging; +} + +void disableLogging() +{ + allow_logging = false; +} diff --git a/src/Common/Logger.h b/src/Common/Logger.h index b54ccd33e72..7471e3dff9b 100644 --- a/src/Common/Logger.h +++ b/src/Common/Logger.h @@ -64,3 +64,7 @@ LoggerRawPtr createRawLogger(const std::string & name, Poco::Channel * channel, * Otherwise, returns false. */ bool hasLogger(const std::string & name); + +void disableLogging(); + +bool isLoggingEnabled(); diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 28cfa98666a..ac412684cf7 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -192,7 +192,7 @@ void MemoryTracker::debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused] { /// Big allocations through allocNoThrow (without checking memory limits) may easily lead to OOM (and it's hard to debug). /// Let's find them. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD if (size < 0) return; diff --git a/src/Common/PageCache.cpp b/src/Common/PageCache.cpp index 56bd8c1a339..d719a387e14 100644 --- a/src/Common/PageCache.cpp +++ b/src/Common/PageCache.cpp @@ -424,7 +424,7 @@ static void logUnexpectedSyscallError(std::string name) { std::string message = fmt::format("{} failed: {}", name, errnoToString()); LOG_WARNING(&Poco::Logger::get("PageCache"), "{}", message); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) volatile bool true_ = true; if (true_) // suppress warning about missing [[noreturn]] abortOnFailedAssertion(message); diff --git a/src/Common/PipeFDs.cpp b/src/Common/PipeFDs.cpp index ceadbb2f983..50eeda1bbe2 100644 --- a/src/Common/PipeFDs.cpp +++ b/src/Common/PipeFDs.cpp @@ -1,19 +1,23 @@ #include #include #include +#include #include #include #include #include -#include #include - namespace DB { +namespace FailPoints +{ + extern const char lazy_pipe_fds_fail_close[]; +} + namespace ErrorCodes { extern const int CANNOT_PIPE; @@ -42,6 +46,11 @@ void LazyPipeFDs::open() void LazyPipeFDs::close() { + fiu_do_on(FailPoints::lazy_pipe_fds_fail_close, + { + throw Exception(ErrorCodes::CANNOT_PIPE, "Manually triggered exception on close"); + }); + for (int & fd : fds_rw) { if (fd < 0) diff --git a/src/Common/PipeFDs.h b/src/Common/PipeFDs.h index 20bd847c077..b651176ee26 100644 --- a/src/Common/PipeFDs.h +++ b/src/Common/PipeFDs.h @@ -1,8 +1,5 @@ #pragma once -#include - - namespace DB { diff --git a/src/Common/SharedMutex.cpp b/src/Common/SharedMutex.cpp index 1df09ca998a..2d63f8542b0 100644 --- a/src/Common/SharedMutex.cpp +++ b/src/Common/SharedMutex.cpp @@ -1,4 +1,5 @@ #include +#include #ifdef OS_LINUX /// Because of futex @@ -12,6 +13,7 @@ namespace DB SharedMutex::SharedMutex() : state(0) , waiters(0) + , writer_thread_id(0) {} void SharedMutex::lock() @@ -29,6 +31,10 @@ void SharedMutex::lock() break; } + /// The first step of acquiring the exclusive ownership is finished. + /// Now we just wait until all readers release the shared ownership. + writer_thread_id.store(getThreadId()); + value |= writers; while (value & readers) futexWaitLowerFetch(state, value); @@ -37,11 +43,15 @@ void SharedMutex::lock() bool SharedMutex::try_lock() { UInt64 value = 0; - return state.compare_exchange_strong(value, writers); + bool success = state.compare_exchange_strong(value, writers); + if (success) + writer_thread_id.store(getThreadId()); + return success; } void SharedMutex::unlock() { + writer_thread_id.store(0); state.store(0); if (waiters) futexWakeUpperAll(state); diff --git a/src/Common/SharedMutex.h b/src/Common/SharedMutex.h index 9215ff62af3..d2947645eca 100644 --- a/src/Common/SharedMutex.h +++ b/src/Common/SharedMutex.h @@ -19,6 +19,8 @@ public: ~SharedMutex() = default; SharedMutex(const SharedMutex &) = delete; SharedMutex & operator=(const SharedMutex &) = delete; + SharedMutex(SharedMutex &&) = delete; + SharedMutex & operator=(SharedMutex &&) = delete; // Exclusive ownership void lock() TSA_ACQUIRE(); @@ -36,6 +38,8 @@ private: alignas(64) std::atomic state; std::atomic waiters; + /// Is set while the lock is held (or is in the process of being acquired) in exclusive mode only to facilitate debugging + std::atomic writer_thread_id; }; } diff --git a/src/Common/SignalHandlers.cpp b/src/Common/SignalHandlers.cpp index a4b7784df5c..e025e49e0a3 100644 --- a/src/Common/SignalHandlers.cpp +++ b/src/Common/SignalHandlers.cpp @@ -89,7 +89,7 @@ void signalHandler(int sig, siginfo_t * info, void * context) writePODBinary(*info, out); writePODBinary(signal_context, out); writePODBinary(stack_trace, out); - writeVectorBinary(Exception::enable_job_stack_trace ? Exception::thread_frame_pointers : std::vector{}, out); + writeVectorBinary(Exception::enable_job_stack_trace ? Exception::getThreadFramePointers() : std::vector{}, out); writeBinary(static_cast(getThreadId()), out); writePODBinary(current_thread, out); @@ -605,7 +605,14 @@ void HandledSignals::reset() HandledSignals::~HandledSignals() { - reset(); + try + { + reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } }; HandledSignals & HandledSignals::instance() diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 34f6f0b7535..76277cbc993 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -489,13 +489,25 @@ struct CacheEntry using CacheEntryPtr = std::shared_ptr; -using StackTraceCache = std::map>; +static constinit bool can_use_cache = false; -static StackTraceCache & cacheInstance() +using StackTraceCacheBase = std::map>; + +struct StackTraceCache : public StackTraceCacheBase { - static StackTraceCache cache; - return cache; -} + StackTraceCache() + : StackTraceCacheBase() + { + can_use_cache = true; + } + + ~StackTraceCache() + { + can_use_cache = false; + } +}; + +static StackTraceCache cache; static DB::SharedMutex stacktrace_cache_mutex; @@ -503,10 +515,16 @@ String toStringCached(const StackTrace::FramePointers & pointers, size_t offset, { const StackTraceRefTriple key{pointers, offset, size}; + if (!can_use_cache) + { + DB::WriteBufferFromOwnString out; + toStringEveryLineImpl(false, key, [&](std::string_view str) { out << str << '\n'; }); + return out.str(); + } + /// Calculation of stack trace text is extremely slow. /// We use cache because otherwise the server could be overloaded by trash queries. /// Note that this cache can grow unconditionally, but practically it should be small. - StackTraceCache & cache = cacheInstance(); CacheEntryPtr cache_entry; // Optimistic try for cache hit to avoid any contention whatsoever, should be the main hot code route @@ -558,7 +576,7 @@ std::string StackTrace::toString(void * const * frame_pointers_raw, size_t offse void StackTrace::dropCache() { std::lock_guard lock{stacktrace_cache_mutex}; - cacheInstance().clear(); + cache.clear(); } diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 01f561d573f..c8f1ae99969 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -51,7 +51,7 @@ public: if (!capture_frame_pointers) return; /// Save all previous jobs call stacks and append with current - frame_pointers = DB::Exception::thread_frame_pointers; + frame_pointers = DB::Exception::getThreadFramePointers(); frame_pointers.push_back(StackTrace().getFramePointers()); } @@ -455,7 +455,7 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ try { if (DB::Exception::enable_job_stack_trace) - DB::Exception::thread_frame_pointers = std::move(job_data->frame_pointers); + DB::Exception::setThreadFramePointers(std::move(job_data->frame_pointers)); CurrentMetrics::Increment metric_active_pool_threads(metric_active_threads); diff --git a/src/Common/TimerDescriptor.cpp b/src/Common/TimerDescriptor.cpp index 248febe226e..b36ea4059cb 100644 --- a/src/Common/TimerDescriptor.cpp +++ b/src/Common/TimerDescriptor.cpp @@ -1,11 +1,12 @@ #if defined(OS_LINUX) + #include #include #include -#include #include + namespace DB { @@ -13,21 +14,18 @@ namespace ErrorCodes { extern const int CANNOT_CREATE_TIMER; extern const int CANNOT_SET_TIMER_PERIOD; - extern const int CANNOT_FCNTL; extern const int CANNOT_READ_FROM_SOCKET; } -TimerDescriptor::TimerDescriptor(int clockid, int flags) +TimerDescriptor::TimerDescriptor() { - timer_fd = timerfd_create(clockid, flags); + timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC); if (timer_fd == -1) - throw Exception(ErrorCodes::CANNOT_CREATE_TIMER, "Cannot create timer_fd descriptor"); - - if (-1 == fcntl(timer_fd, F_SETFL, O_NONBLOCK)) - throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot set O_NONBLOCK for timer_fd"); + throw ErrnoException(ErrorCodes::CANNOT_CREATE_TIMER, "Cannot create timer_fd descriptor"); } -TimerDescriptor::TimerDescriptor(TimerDescriptor && other) noexcept : timer_fd(other.timer_fd) +TimerDescriptor::TimerDescriptor(TimerDescriptor && other) noexcept + : timer_fd(other.timer_fd) { other.timer_fd = -1; } @@ -40,21 +38,19 @@ TimerDescriptor & TimerDescriptor::operator=(DB::TimerDescriptor && other) noexc TimerDescriptor::~TimerDescriptor() { - /// Do not check for result cause cannot throw exception. if (timer_fd != -1) { - int err = close(timer_fd); - chassert(!err || errno == EINTR); + if (0 != ::close(timer_fd)) + std::terminate(); } } void TimerDescriptor::reset() const { - itimerspec spec; - spec.it_interval.tv_nsec = 0; - spec.it_interval.tv_sec = 0; - spec.it_value.tv_sec = 0; - spec.it_value.tv_nsec = 0; + if (timer_fd == -1) + return; + + itimerspec spec{}; if (-1 == timerfd_settime(timer_fd, 0 /*relative timer */, &spec, nullptr)) throw ErrnoException(ErrorCodes::CANNOT_SET_TIMER_PERIOD, "Cannot reset timer_fd"); @@ -66,25 +62,46 @@ void TimerDescriptor::reset() const void TimerDescriptor::drain() const { + if (timer_fd == -1) + return; + /// It is expected that socket returns 8 bytes when readable. /// Read in loop anyway cause signal may interrupt read call. + + /// man timerfd_create: + /// If the timer has already expired one or more times since its settings were last modified using timerfd_settime(), + /// or since the last successful read(2), then the buffer given to read(2) returns an unsigned 8-byte integer (uint64_t) + /// containing the number of expirations that have occurred. + /// (The returned value is in host byte order—that is, the native byte order for integers on the host machine.) uint64_t buf; while (true) { ssize_t res = ::read(timer_fd, &buf, sizeof(buf)); if (res < 0) { + /// man timerfd_create: + /// If no timer expirations have occurred at the time of the read(2), + /// then the call either blocks until the next timer expiration, or fails with the error EAGAIN + /// if the file descriptor has been made nonblocking + /// (via the use of the fcntl(2) F_SETFL operation to set the O_NONBLOCK flag). if (errno == EAGAIN) break; - if (errno != EINTR) - throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot drain timer_fd"); + /// A signal happened, need to retry. + if (errno == EINTR) + continue; + + throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot drain timer_fd"); } + + chassert(res == sizeof(buf)); } } void TimerDescriptor::setRelative(uint64_t usec) const { + chassert(timer_fd >= 0); + static constexpr uint32_t TIMER_PRECISION = 1e6; itimerspec spec; @@ -103,4 +120,5 @@ void TimerDescriptor::setRelative(Poco::Timespan timespan) const } } + #endif diff --git a/src/Common/TimerDescriptor.h b/src/Common/TimerDescriptor.h index 0292f85d770..8d3b22868ac 100644 --- a/src/Common/TimerDescriptor.h +++ b/src/Common/TimerDescriptor.h @@ -12,7 +12,7 @@ private: int timer_fd; public: - explicit TimerDescriptor(int clockid = CLOCK_MONOTONIC, int flags = 0); + TimerDescriptor(); ~TimerDescriptor(); TimerDescriptor(const TimerDescriptor &) = delete; diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 2c6cbc4a5d5..ce7489a33e5 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -548,7 +548,7 @@ public: virtual bool isExpired() const = 0; /// Get the current connected node idx. - virtual Int8 getConnectedNodeIdx() const = 0; + virtual std::optional getConnectedNodeIdx() const = 0; /// Get the current connected host and port. virtual String getConnectedHostPort() const = 0; diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index 2194ad015bf..562c313ac0e 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -39,7 +39,7 @@ public: ~TestKeeper() override; bool isExpired() const override { return expired; } - Int8 getConnectedNodeIdx() const override { return 0; } + std::optional getConnectedNodeIdx() const override { return 0; } String getConnectedHostPort() const override { return "TestKeeper:0000"; } int32_t getConnectionXid() const override { return 0; } int64_t getSessionID() const override { return 0; } diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 01bb508da95..064ac2261ec 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -128,16 +128,15 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr ShuffleHosts shuffled_hosts = shuffleHosts(); impl = std::make_unique(shuffled_hosts, args, zk_log); - Int8 node_idx = impl->getConnectedNodeIdx(); + auto node_idx = impl->getConnectedNodeIdx(); if (args.chroot.empty()) LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(args.hosts, ",")); else LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(args.hosts, ","), args.chroot); - /// If the balancing strategy has an optimal node then it will be the first in the list - bool connected_to_suboptimal_node = node_idx != shuffled_hosts[0].original_index; + bool connected_to_suboptimal_node = node_idx && static_cast(*node_idx) != shuffled_hosts[0].original_index; bool respect_az = args.prefer_local_availability_zone && !args.client_availability_zone.empty(); bool may_benefit_from_reconnecting = respect_az || args.get_priority_load_balancing.hasOptimalNode(); if (connected_to_suboptimal_node && may_benefit_from_reconnecting) @@ -145,7 +144,7 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr auto reconnect_timeout_sec = getSecondsUntilReconnect(args); LOG_DEBUG(log, "Connected to a suboptimal ZooKeeper host ({}, index {})." " To preserve balance in ZooKeeper usage, this ZooKeeper session will expire in {} seconds", - impl->getConnectedHostPort(), node_idx, reconnect_timeout_sec); + impl->getConnectedHostPort(), *node_idx, reconnect_timeout_sec); auto reconnect_task_holder = DB::Context::getGlobalContextInstance()->getSchedulePool().createTask("ZKReconnect", [this, optimal_host = shuffled_hosts[0]]() { @@ -154,13 +153,15 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr LOG_DEBUG(log, "Trying to connect to a more optimal node {}", optimal_host.host); ShuffleHosts node{optimal_host}; std::unique_ptr new_impl = std::make_unique(node, args, zk_log); - Int8 new_node_idx = new_impl->getConnectedNodeIdx(); + + auto new_node_idx = new_impl->getConnectedNodeIdx(); + chassert(new_node_idx.has_value()); /// Maybe the node was unavailable when getting AZs first time, update just in case - if (args.availability_zone_autodetect && availability_zones[new_node_idx].empty()) + if (args.availability_zone_autodetect && availability_zones[*new_node_idx].empty()) { - availability_zones[new_node_idx] = new_impl->tryGetAvailabilityZone(); - LOG_DEBUG(log, "Got availability zone for {}: {}", optimal_host.host, availability_zones[new_node_idx]); + availability_zones[*new_node_idx] = new_impl->tryGetAvailabilityZone(); + LOG_DEBUG(log, "Got availability zone for {}: {}", optimal_host.host, availability_zones[*new_node_idx]); } optimal_impl = std::move(new_impl); @@ -1525,7 +1526,7 @@ void ZooKeeper::setServerCompletelyStarted() zk->setServerCompletelyStarted(); } -Int8 ZooKeeper::getConnectedHostIdx() const +std::optional ZooKeeper::getConnectedHostIdx() const { return impl->getConnectedNodeIdx(); } @@ -1544,10 +1545,10 @@ String ZooKeeper::getConnectedHostAvailabilityZone() const { if (args.implementation != "zookeeper" || !impl) return ""; - Int8 idx = impl->getConnectedNodeIdx(); - if (idx < 0) + std::optional idx = impl->getConnectedNodeIdx(); + if (!idx) return ""; /// session expired - return availability_zones.at(idx); + return availability_zones.at(*idx); } size_t getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses) diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 4ae2cfa6096..657c9cb2c03 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -620,7 +620,7 @@ public: void setServerCompletelyStarted(); - Int8 getConnectedHostIdx() const; + std::optional getConnectedHostIdx() const; String getConnectedHostPort() const; int32_t getConnectionXid() const; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 2728f953bea..ba622f30c91 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -536,7 +536,7 @@ void ZooKeeper::connect( compressed_out.emplace(*out, CompressionCodecFactory::instance().get("LZ4", {})); } - original_index = static_cast(node.original_index); + original_index.store(node.original_index); break; } catch (...) @@ -1014,9 +1014,6 @@ void ZooKeeper::finalize(bool error_send, bool error_receive, const String & rea LOG_INFO(log, "Finalizing session {}. finalization_started: {}, queue_finished: {}, reason: '{}'", session_id, already_started, requests_queue.isFinished(), reason); - /// Reset the original index. - original_index = -1; - auto expire_session_if_not_expired = [&] { /// No new requests will appear in queue after finish() @@ -1534,6 +1531,30 @@ void ZooKeeper::close() } +std::optional ZooKeeper::getConnectedNodeIdx() const +{ + int8_t res = original_index.load(); + if (res == -1) + return std::nullopt; + else + return res; +} + +String ZooKeeper::getConnectedHostPort() const +{ + auto idx = getConnectedNodeIdx(); + if (idx) + return args.hosts[*idx]; + else + return ""; +} + +int32_t ZooKeeper::getConnectionXid() const +{ + return next_xid.load(); +} + + void ZooKeeper::setZooKeeperLog(std::shared_ptr zk_log_) { /// logOperationIfNeeded(...) uses zk_log and can be called from different threads, so we have to use atomic shared_ptr diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 0c88c35b381..39082cd14c1 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -114,13 +114,12 @@ public: ~ZooKeeper() override; - /// If expired, you can only destroy the object. All other methods will throw exception. bool isExpired() const override { return requests_queue.isFinished(); } - Int8 getConnectedNodeIdx() const override { return original_index; } - String getConnectedHostPort() const override { return (original_index == -1) ? "" : args.hosts[original_index]; } - int32_t getConnectionXid() const override { return next_xid.load(); } + std::optional getConnectedNodeIdx() const override; + String getConnectedHostPort() const override; + int32_t getConnectionXid() const override; String tryGetAvailabilityZone() override; @@ -219,7 +218,7 @@ private: ACLs default_acls; zkutil::ZooKeeperArgs args; - Int8 original_index = -1; + std::atomic original_index{-1}; /// Fault injection void maybeInjectSendFault(); diff --git a/src/Common/assert_cast.h b/src/Common/assert_cast.h index f9d0bf0e595..7a04372ffad 100644 --- a/src/Common/assert_cast.h +++ b/src/Common/assert_cast.h @@ -25,7 +25,7 @@ namespace DB template inline To assert_cast(From && from) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD try { if constexpr (std::is_pointer_v) diff --git a/src/Common/config.h.in b/src/Common/config.h.in index f68701d5d10..6a0090130a3 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -64,6 +64,7 @@ #cmakedefine01 USE_LIBARCHIVE #cmakedefine01 USE_POCKETFFT #cmakedefine01 USE_PROMETHEUS_PROTOBUFS +#cmakedefine01 USE_NUMACTL /// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO. /// That's why we use absolute paths. diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index c133e9f5617..69580d4ad0e 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -1,14 +1,14 @@ clickhouse_add_executable (hashes_test hashes_test.cpp) -target_link_libraries (hashes_test PRIVATE clickhouse_common_io ch_contrib::cityhash) +target_link_libraries (hashes_test PRIVATE clickhouse_common_io clickhouse_common_config ch_contrib::cityhash) if (TARGET OpenSSL::Crypto) target_link_libraries (hashes_test PRIVATE OpenSSL::Crypto) endif() clickhouse_add_executable (sip_hash_perf sip_hash_perf.cpp) -target_link_libraries (sip_hash_perf PRIVATE clickhouse_common_io) +target_link_libraries (sip_hash_perf PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (small_table small_table.cpp) -target_link_libraries (small_table PRIVATE clickhouse_common_io) +target_link_libraries (small_table PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (parallel_aggregation parallel_aggregation.cpp) target_link_libraries (parallel_aggregation PRIVATE dbms clickhouse_functions) @@ -17,13 +17,13 @@ clickhouse_add_executable (parallel_aggregation2 parallel_aggregation2.cpp) target_link_libraries (parallel_aggregation2 PRIVATE dbms clickhouse_functions) clickhouse_add_executable (int_hashes_perf int_hashes_perf.cpp) -target_link_libraries (int_hashes_perf PRIVATE clickhouse_common_io) +target_link_libraries (int_hashes_perf PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (compact_array compact_array.cpp) -target_link_libraries (compact_array PRIVATE clickhouse_common_io) +target_link_libraries (compact_array PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (radix_sort radix_sort.cpp) -target_link_libraries (radix_sort PRIVATE clickhouse_common_io ch_contrib::pdqsort) +target_link_libraries (radix_sort PRIVATE clickhouse_common_io clickhouse_common_config ch_contrib::pdqsort) clickhouse_add_executable (arena_with_free_lists arena_with_free_lists.cpp) target_link_libraries (arena_with_free_lists PRIVATE dbms) @@ -33,46 +33,46 @@ target_link_libraries (lru_hash_map_perf PRIVATE dbms) if (OS_LINUX) clickhouse_add_executable (thread_creation_latency thread_creation_latency.cpp) - target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io) + target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io clickhouse_common_config) endif() clickhouse_add_executable (array_cache array_cache.cpp) -target_link_libraries (array_cache PRIVATE clickhouse_common_io) +target_link_libraries (array_cache PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (space_saving space_saving.cpp) -target_link_libraries (space_saving PRIVATE clickhouse_common_io) +target_link_libraries (space_saving PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (integer_hash_tables_benchmark integer_hash_tables_benchmark.cpp) target_link_libraries (integer_hash_tables_benchmark PRIVATE dbms ch_contrib::abseil_swiss_tables ch_contrib::sparsehash) clickhouse_add_executable (cow_columns cow_columns.cpp) -target_link_libraries (cow_columns PRIVATE clickhouse_common_io) +target_link_libraries (cow_columns PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (cow_compositions cow_compositions.cpp) -target_link_libraries (cow_compositions PRIVATE clickhouse_common_io) +target_link_libraries (cow_compositions PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (stopwatch stopwatch.cpp) -target_link_libraries (stopwatch PRIVATE clickhouse_common_io) +target_link_libraries (stopwatch PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (symbol_index symbol_index.cpp) -target_link_libraries (symbol_index PRIVATE clickhouse_common_io) +target_link_libraries (symbol_index PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (chaos_sanitizer chaos_sanitizer.cpp) -target_link_libraries (chaos_sanitizer PRIVATE clickhouse_common_io) +target_link_libraries (chaos_sanitizer PRIVATE clickhouse_common_io clickhouse_common_config) if (OS_LINUX) clickhouse_add_executable (memory_statistics_os_perf memory_statistics_os_perf.cpp) - target_link_libraries (memory_statistics_os_perf PRIVATE clickhouse_common_io) + target_link_libraries (memory_statistics_os_perf PRIVATE clickhouse_common_io clickhouse_common_config) endif() clickhouse_add_executable (procfs_metrics_provider_perf procfs_metrics_provider_perf.cpp) -target_link_libraries (procfs_metrics_provider_perf PRIVATE clickhouse_common_io) +target_link_libraries (procfs_metrics_provider_perf PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (average average.cpp) -target_link_libraries (average PRIVATE clickhouse_common_io) +target_link_libraries (average PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (shell_command_inout shell_command_inout.cpp) -target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io) +target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (executable_udf executable_udf.cpp) target_link_libraries (executable_udf PRIVATE dbms) @@ -91,4 +91,4 @@ if (ENABLE_SSL) endif() clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp) -target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io) +target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io clickhouse_common_config) diff --git a/src/Common/getRandomASCIIString.cpp b/src/Common/getRandomASCIIString.cpp index 594b4cd3228..a295277b453 100644 --- a/src/Common/getRandomASCIIString.cpp +++ b/src/Common/getRandomASCIIString.cpp @@ -6,12 +6,17 @@ namespace DB { String getRandomASCIIString(size_t length) +{ + return getRandomASCIIString(length, thread_local_rng); +} + +String getRandomASCIIString(size_t length, pcg64 & rng) { std::uniform_int_distribution distribution('a', 'z'); String res; res.resize(length); for (auto & c : res) - c = distribution(thread_local_rng); + c = distribution(rng); return res; } diff --git a/src/Common/getRandomASCIIString.h b/src/Common/getRandomASCIIString.h index 627d2700ce3..19e1ff7120e 100644 --- a/src/Common/getRandomASCIIString.h +++ b/src/Common/getRandomASCIIString.h @@ -2,11 +2,14 @@ #include +#include + namespace DB { /// Slow random string. Useful for random names and things like this. Not for generating data. String getRandomASCIIString(size_t length); +String getRandomASCIIString(size_t length, pcg64 & rng); } diff --git a/src/Common/mysqlxx/tests/CMakeLists.txt b/src/Common/mysqlxx/tests/CMakeLists.txt index c560d0e3153..53bee778470 100644 --- a/src/Common/mysqlxx/tests/CMakeLists.txt +++ b/src/Common/mysqlxx/tests/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (mysqlxx_pool_test mysqlxx_pool_test.cpp) -target_link_libraries (mysqlxx_pool_test PRIVATE mysqlxx) +target_link_libraries (mysqlxx_pool_test PRIVATE mysqlxx clickhouse_common_config loggers_no_text_log) diff --git a/src/Common/tests/gtest_cgroups_reader.cpp b/src/Common/tests/gtest_cgroups_reader.cpp new file mode 100644 index 00000000000..2de25bb42ce --- /dev/null +++ b/src/Common/tests/gtest_cgroups_reader.cpp @@ -0,0 +1,178 @@ +#if defined(OS_LINUX) + +#include + +#include +#include + +#include +#include +#include + +using namespace DB; + + +const std::string SAMPLE_FILE[2] = { + R"(cache 4673703936 +rss 2232029184 +rss_huge 0 +shmem 0 +mapped_file 344678400 +dirty 4730880 +writeback 135168 +swap 0 +pgpgin 2038569918 +pgpgout 2036883790 +pgfault 2055373287 +pgmajfault 0 +inactive_anon 2156335104 +active_anon 0 +inactive_file 2841305088 +active_file 1653915648 +unevictable 256008192 +hierarchical_memory_limit 8589934592 +hierarchical_memsw_limit 8589934592 +total_cache 4673703936 +total_rss 2232029184 +total_rss_huge 0 +total_shmem 0 +total_mapped_file 344678400 +total_dirty 4730880 +total_writeback 135168 +total_swap 0 +total_pgpgin 2038569918 +total_pgpgout 2036883790 +total_pgfault 2055373287 +total_pgmajfault 0 +total_inactive_anon 2156335104 +total_active_anon 0 +total_inactive_file 2841305088 +total_active_file 1653915648 +total_unevictable 256008192 +)", + R"(anon 10429399040 +file 17410793472 +kernel 1537789952 +kernel_stack 3833856 +pagetables 65441792 +sec_pagetables 0 +percpu 15232 +sock 0 +vmalloc 0 +shmem 0 +zswap 0 +zswapped 0 +file_mapped 344010752 +file_dirty 2060857344 +file_writeback 0 +swapcached 0 +anon_thp 0 +file_thp 0 +shmem_thp 0 +inactive_anon 0 +active_anon 10429370368 +inactive_file 8693084160 +active_file 8717561856 +unevictable 0 +slab_reclaimable 1460982504 +slab_unreclaimable 5152864 +slab 1466135368 +workingset_refault_anon 0 +workingset_refault_file 0 +workingset_activate_anon 0 +workingset_activate_file 0 +workingset_restore_anon 0 +workingset_restore_file 0 +workingset_nodereclaim 0 +pgscan 0 +pgsteal 0 +pgscan_kswapd 0 +pgscan_direct 0 +pgscan_khugepaged 0 +pgsteal_kswapd 0 +pgsteal_direct 0 +pgsteal_khugepaged 0 +pgfault 43026352 +pgmajfault 36762 +pgrefill 0 +pgactivate 0 +pgdeactivate 0 +pglazyfree 259 +pglazyfreed 0 +zswpin 0 +zswpout 0 +thp_fault_alloc 0 +thp_collapse_alloc 0 +)"}; + +const std::string EXPECTED[2] + = {"{\"active_anon\": 0, \"active_file\": 1653915648, \"cache\": 4673703936, \"dirty\": 4730880, \"hierarchical_memory_limit\": " + "8589934592, \"hierarchical_memsw_limit\": 8589934592, \"inactive_anon\": 2156335104, \"inactive_file\": 2841305088, " + "\"mapped_file\": 344678400, \"pgfault\": 2055373287, \"pgmajfault\": 0, \"pgpgin\": 2038569918, \"pgpgout\": 2036883790, \"rss\": " + "2232029184, \"rss_huge\": 0, \"shmem\": 0, \"swap\": 0, \"total_active_anon\": 0, \"total_active_file\": 1653915648, " + "\"total_cache\": 4673703936, \"total_dirty\": 4730880, \"total_inactive_anon\": 2156335104, \"total_inactive_file\": 2841305088, " + "\"total_mapped_file\": 344678400, \"total_pgfault\": 2055373287, \"total_pgmajfault\": 0, \"total_pgpgin\": 2038569918, " + "\"total_pgpgout\": 2036883790, \"total_rss\": 2232029184, \"total_rss_huge\": 0, \"total_shmem\": 0, \"total_swap\": 0, " + "\"total_unevictable\": 256008192, \"total_writeback\": 135168, \"unevictable\": 256008192, \"writeback\": 135168}", + "{\"active_anon\": 10429370368, \"active_file\": 8717561856, \"anon\": 10429399040, \"anon_thp\": 0, \"file\": 17410793472, " + "\"file_dirty\": 2060857344, \"file_mapped\": 344010752, \"file_thp\": 0, \"file_writeback\": 0, \"inactive_anon\": 0, " + "\"inactive_file\": 8693084160, \"kernel\": 1537789952, \"kernel_stack\": 3833856, \"pagetables\": 65441792, \"percpu\": 15232, " + "\"pgactivate\": 0, \"pgdeactivate\": 0, \"pgfault\": 43026352, \"pglazyfree\": 259, \"pglazyfreed\": 0, \"pgmajfault\": 36762, " + "\"pgrefill\": 0, \"pgscan\": 0, \"pgscan_direct\": 0, \"pgscan_khugepaged\": 0, \"pgscan_kswapd\": 0, \"pgsteal\": 0, " + "\"pgsteal_direct\": 0, \"pgsteal_khugepaged\": 0, \"pgsteal_kswapd\": 0, \"sec_pagetables\": 0, \"shmem\": 0, \"shmem_thp\": 0, " + "\"slab\": 1466135368, \"slab_reclaimable\": 1460982504, \"slab_unreclaimable\": 5152864, \"sock\": 0, \"swapcached\": 0, " + "\"thp_collapse_alloc\": 0, \"thp_fault_alloc\": 0, \"unevictable\": 0, \"vmalloc\": 0, \"workingset_activate_anon\": 0, " + "\"workingset_activate_file\": 0, \"workingset_nodereclaim\": 0, \"workingset_refault_anon\": 0, \"workingset_refault_file\": 0, " + "\"workingset_restore_anon\": 0, \"workingset_restore_file\": 0, \"zswap\": 0, \"zswapped\": 0, \"zswpin\": 0, \"zswpout\": 0}"}; + + +class CgroupsMemoryUsageObserverFixture : public ::testing::TestWithParam +{ + void SetUp() override + { + const uint8_t version = static_cast(GetParam()); + tmp_dir = fmt::format("./test_cgroups_{}", magic_enum::enum_name(GetParam())); + fs::create_directories(tmp_dir); + + auto stat_file = WriteBufferFromFile(tmp_dir + "/memory.stat"); + stat_file.write(SAMPLE_FILE[version].data(), SAMPLE_FILE[version].size()); + stat_file.sync(); + + if (GetParam() == CgroupsMemoryUsageObserver::CgroupsVersion::V2) + { + auto current_file = WriteBufferFromFile(tmp_dir + "/memory.current"); + current_file.write("29645422592", 11); + current_file.sync(); + } + } + +protected: + std::string tmp_dir; +}; + + +TEST_P(CgroupsMemoryUsageObserverFixture, ReadMemoryUsageTest) +{ + const auto version = GetParam(); + auto reader = createCgroupsReader(version, tmp_dir); + ASSERT_EQ( + reader->readMemoryUsage(), + version == CgroupsMemoryUsageObserver::CgroupsVersion::V1 ? /* rss from memory.stat */ 2232029184 + : /* value from memory.current - inactive_file */ 20952338432); +} + + +TEST_P(CgroupsMemoryUsageObserverFixture, DumpAllStatsTest) +{ + const auto version = GetParam(); + auto reader = createCgroupsReader(version, tmp_dir); + ASSERT_EQ(reader->dumpAllStats(), EXPECTED[static_cast(version)]); +} + + +INSTANTIATE_TEST_SUITE_P( + CgroupsMemoryUsageObserverTests, + CgroupsMemoryUsageObserverFixture, + ::testing::Values(CgroupsMemoryUsageObserver::CgroupsVersion::V1, CgroupsMemoryUsageObserver::CgroupsVersion::V2)); + +#endif diff --git a/src/Common/tests/gtest_resolve_pool.cpp b/src/Common/tests/gtest_resolve_pool.cpp index b760b9b1524..c443e961cc7 100644 --- a/src/Common/tests/gtest_resolve_pool.cpp +++ b/src/Common/tests/gtest_resolve_pool.cpp @@ -33,7 +33,7 @@ size_t toMilliseconds(auto duration) return std::chrono::duration_cast(duration).count(); } -const auto epsilon = 500us; +const auto epsilon = 1ms; class ResolvePoolMock : public DB::HostResolver { @@ -358,53 +358,59 @@ void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses TEST_F(ResolvePoolTest, BannedForConsiquenceFail) { - auto history = 5ms; + auto history = 10ms; auto resolver = make_resolver(toMilliseconds(history)); auto failed_addr = resolver->resolve(); ASSERT_TRUE(addresses.contains(*failed_addr)); - auto start_at = now(); failed_addr.setFail(); + auto start_at = now(); + ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); check_no_failed_address(1, resolver, addresses, failed_addr, metrics, start_at + history - epsilon); sleep_until(start_at + history + epsilon); - start_at = now(); resolver->update(); ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count)); failed_addr.setFail(); + start_at = now(); + check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon); sleep_until(start_at + history + epsilon); - start_at = now(); resolver->update(); + + // too much time has passed + if (now() > start_at + 2*history - epsilon) + return; + ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); // ip still banned adter history_ms + update, because it was his second consiquent fail - check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon); + check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + 2*history - epsilon); } TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail) { - auto history = 5ms; + auto history = 10ms; auto resolver = make_resolver(toMilliseconds(history)); auto failed_addr = resolver->resolve(); ASSERT_TRUE(addresses.contains(*failed_addr)); - auto start_at = now(); + failed_addr.setFail(); + failed_addr.setFail(); + failed_addr.setFail(); - failed_addr.setFail(); - failed_addr.setFail(); - failed_addr.setFail(); + auto start_at = now(); ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); @@ -413,6 +419,7 @@ TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail) sleep_until(start_at + history + epsilon); resolver->update(); + // ip is cleared after just 1 history_ms interval. ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count)); diff --git a/src/Common/tests/gtest_rw_lock.cpp b/src/Common/tests/gtest_rw_lock.cpp index d8c6e9cb99d..9b0c9aeafbe 100644 --- a/src/Common/tests/gtest_rw_lock.cpp +++ b/src/Common/tests/gtest_rw_lock.cpp @@ -166,7 +166,7 @@ TEST(Common, RWLockRecursive) auto lock2 = fifo_lock->getLock(RWLockImpl::Read, "q2"); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD /// It throws LOGICAL_ERROR EXPECT_ANY_THROW({fifo_lock->getLock(RWLockImpl::Write, "q2");}); #endif diff --git a/src/Compression/examples/CMakeLists.txt b/src/Compression/examples/CMakeLists.txt index a7cc6bebf42..fee8cf89942 100644 --- a/src/Compression/examples/CMakeLists.txt +++ b/src/Compression/examples/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (compressed_buffer compressed_buffer.cpp) -target_link_libraries (compressed_buffer PRIVATE clickhouse_common_io clickhouse_compression) +target_link_libraries (compressed_buffer PRIVATE clickhouse_common_io clickhouse_common_config clickhouse_compression) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index d40e5ef2e50..f09ea56391a 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -383,7 +383,10 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co LockMemoryExceptionInThread::removeUniqueLock(); }; - asio_opts.thread_pool_size_ = getNumberOfPhysicalCPUCores(); + /// At least 16 threads for network communication in asio. + /// asio is async framework, so even with 1 thread it should be ok, but + /// still as safeguard it's better to have some redundant capacity here + asio_opts.thread_pool_size_ = std::max(16U, getNumberOfPhysicalCPUCores()); if (state_manager->isSecure()) { diff --git a/src/Core/InterpolateDescription.cpp b/src/Core/InterpolateDescription.cpp index d828c2e85e9..86681fdb591 100644 --- a/src/Core/InterpolateDescription.cpp +++ b/src/Core/InterpolateDescription.cpp @@ -13,10 +13,10 @@ namespace DB { - InterpolateDescription::InterpolateDescription(ActionsDAGPtr actions_, const Aliases & aliases) - : actions(actions_) + InterpolateDescription::InterpolateDescription(ActionsDAG actions_, const Aliases & aliases) + : actions(std::move(actions_)) { - for (const auto & name_type : actions->getRequiredColumns()) + for (const auto & name_type : actions.getRequiredColumns()) { if (const auto & p = aliases.find(name_type.name); p != aliases.end()) required_columns_map[p->second->getColumnName()] = name_type; @@ -24,7 +24,7 @@ namespace DB required_columns_map[name_type.name] = name_type; } - for (const ColumnWithTypeAndName & column : actions->getResultColumns()) + for (const ColumnWithTypeAndName & column : actions.getResultColumns()) { std::string name = column.name; if (const auto & p = aliases.find(name); p != aliases.end()) diff --git a/src/Core/InterpolateDescription.h b/src/Core/InterpolateDescription.h index 62d7120508b..eeead71d780 100644 --- a/src/Core/InterpolateDescription.h +++ b/src/Core/InterpolateDescription.h @@ -5,21 +5,20 @@ #include #include #include +#include namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; using Aliases = std::unordered_map; /// Interpolate description struct InterpolateDescription { - explicit InterpolateDescription(ActionsDAGPtr actions, const Aliases & aliases); + explicit InterpolateDescription(ActionsDAG actions, const Aliases & aliases); - ActionsDAGPtr actions; + ActionsDAG actions; std::unordered_map required_columns_map; /// input column name -> {alias, type} std::unordered_set result_columns_set; /// result block columns diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 3d181e33001..27b71558bd3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1,5 +1,8 @@ #pragma once +/// CLion freezes for a minute on every keypress in any file including this. +#if !defined(__CLION_IDE__) + #include #include #include @@ -183,7 +186,7 @@ class IColumn; M(Bool, allow_suspicious_ttl_expressions, false, "Reject TTL expressions that don't depend on any of table's columns. It indicates a user error most of the time.", 0) \ M(Bool, allow_suspicious_variant_types, false, "In CREATE TABLE statement allows specifying Variant type with similar variant types (for example, with different numeric or date types). Enabling this setting may introduce some ambiguity when working with values with similar types.", 0) \ M(Bool, allow_suspicious_primary_key, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)", 0) \ - M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code.", 0) \ + M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code. Due to a bug in the LLVM compiler infrastructure, on AArch64 machines, it is known to lead to a nullptr dereference and, consequently, server crash. Do not enable this setting.", 0) \ M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \ M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \ M(UInt64, min_count_to_compile_aggregate_expression, 3, "The number of identical aggregate expressions before they are JIT-compiled", 0) \ @@ -602,7 +605,7 @@ class IColumn; M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ - M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ + M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \ @@ -763,7 +766,7 @@ class IColumn; M(UInt64, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \ M(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \ M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \ - M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \ + M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 2 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) ALIAS(filesystem_prefetch_min_bytes_for_single_read_task) \ M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \ M(Bool, merge_tree_determine_task_size_by_prewhere_columns, true, "Whether to use only prewhere columns size to determine reading task size.", 0) \ M(UInt64, merge_tree_compact_parts_min_granules_to_multibuffer_read, 16, "Only available in ClickHouse Cloud", 0) \ @@ -805,7 +808,6 @@ class IColumn; M(UInt64, prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the prefetch buffer to read from the filesystem.", 0) \ M(UInt64, filesystem_prefetch_step_bytes, 0, "Prefetch step in bytes. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task", 0) \ M(UInt64, filesystem_prefetch_step_marks, 0, "Prefetch step in marks. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task", 0) \ - M(UInt64, filesystem_prefetch_min_bytes_for_single_read_task, "2Mi", "Do not parallelize within one file read less than this amount of bytes. E.g. one reader will not receive a read task of size less than this amount. This setting is recommended to avoid spikes of time for aws getObject requests to aws", 0) \ M(UInt64, filesystem_prefetch_max_memory_usage, "1Gi", "Maximum memory usage for prefetches.", 0) \ M(UInt64, filesystem_prefetches_limit, 200, "Maximum number of prefetches. Zero means unlimited. A setting `filesystem_prefetches_max_memory_usage` is more recommended if you want to limit the number of prefetches", 0) \ \ @@ -891,6 +893,8 @@ class IColumn; M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ + M(Bool, restore_replace_external_engines_to_null, false, "Replace all the external table engines to Null on restore. Useful for testing purposes", 0) \ + M(Bool, restore_replace_external_table_functions_to_null, false, "Replace all table functions to Null on restore. Useful for testing purposes", 0) \ \ \ /* ###################################### */ \ @@ -1123,7 +1127,7 @@ class IColumn; M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ - M(Bool, input_format_json_ignore_key_case, false, "Ignore json key case while read json field from string", 0) \ + M(Bool, input_format_json_case_insensitive_column_matching, false, "Ignore case when matching JSON keys with CH columns", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ @@ -1154,7 +1158,6 @@ class IColumn; M(Bool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \ M(Bool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \ M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \ - M(Bool, input_format_values_allow_data_after_semicolon, false, "For Values format: allow extra data after semicolon (used by client to interpret comments).", 0) \ M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \ /** This setting is obsolete and do nothing, left for compatibility reasons. */ \ M(Bool, input_format_avro_null_as_default, false, "For Avro/AvroConfluent format: insert default in case of null and non Nullable column", 0) \ @@ -1348,3 +1351,5 @@ struct FormatFactorySettings : public BaseSettings }; } + +#endif diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index de4725dc350..8bea0b1eed3 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -63,9 +63,8 @@ static std::initializer_listgetID()); for (size_t i = argument_types_start_idx; i < arguments->children.size(); ++i) argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i])); diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index af37cde2846..6f7dcd65b83 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include @@ -22,7 +22,6 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int UNKNOWN_TYPE; - extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE; extern const int UNEXPECTED_AST_STRUCTURE; extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS; } @@ -83,15 +82,9 @@ DataTypePtr DataTypeFactory::tryGet(const ASTPtr & ast) const template DataTypePtr DataTypeFactory::getImpl(const ASTPtr & ast) const { - if (const auto * func = ast->as()) + if (const auto * type = ast->as()) { - if (func->parameters) - { - if constexpr (nullptr_on_error) - return nullptr; - throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE, "Data type cannot have multiple parenthesized parameters."); - } - return getImpl(func->name, func->arguments); + return getImpl(type->name, type->arguments); } if (const auto * ident = ast->as()) @@ -107,7 +100,7 @@ DataTypePtr DataTypeFactory::getImpl(const ASTPtr & ast) const if constexpr (nullptr_on_error) return nullptr; - throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST element for data type."); + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST element for data type: {}.", ast->getID()); } DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr & parameters) const @@ -125,23 +118,6 @@ DataTypePtr DataTypeFactory::getImpl(const String & family_name_param, const AST { String family_name = getAliasToOrName(family_name_param); - if (endsWith(family_name, "WithDictionary")) - { - ASTPtr low_cardinality_params = std::make_shared(); - String param_name = family_name.substr(0, family_name.size() - strlen("WithDictionary")); - if (parameters) - { - auto func = std::make_shared(); - func->name = param_name; - func->arguments = parameters; - low_cardinality_params->children.push_back(func); - } - else - low_cardinality_params->children.push_back(std::make_shared(param_name)); - - return getImpl("LowCardinality", low_cardinality_params); - } - const auto * creator = findCreatorByName(family_name); if constexpr (nullptr_on_error) { diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index 5636a46373f..91b9bfcb2a5 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -4,9 +4,10 @@ #include #include -#include +#include #include + namespace DB { @@ -53,13 +54,13 @@ static DataTypePtr create(const ASTPtr & arguments) ASTPtr schema_argument = arguments->children[0]; bool is_nullable = false; - if (const auto * func = schema_argument->as()) + if (const auto * type = schema_argument->as()) { - if (func->name != "Nullable" || func->arguments->children.size() != 1) + if (type->name != "Nullable" || type->arguments->children.size() != 1) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, - "Expected 'Nullable()' as parameter for type Object (function: {})", func->name); + "Expected 'Nullable()' as parameter for type Object (function: {})", type->name); - schema_argument = func->arguments->children[0]; + schema_argument = type->arguments->children[0]; is_nullable = true; } diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index d81dc7a76d8..d149b49d465 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -80,13 +80,20 @@ namespace /// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query. void visitCreateQuery(const ASTCreateQuery & create) { - QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name}; - if (!to_table.table.empty()) + if (create.targets) { - /// TO target_table (for materialized views) - if (to_table.database.empty()) - to_table.database = current_database; - dependencies.emplace(to_table); + for (const auto & target : create.targets->targets) + { + const auto & table_id = target.table_id; + if (!table_id.table_name.empty()) + { + /// TO target_table (for materialized views) + QualifiedTableName target_name{table_id.database_name, table_id.table_name}; + if (target_name.database.empty()) + target_name.database = current_database; + dependencies.emplace(target_name); + } + } } QualifiedTableName as_table{create.as_database, create.as_table}; diff --git a/src/Databases/DDLRenamingVisitor.cpp b/src/Databases/DDLRenamingVisitor.cpp index 6cd414635a0..38e100e2470 100644 --- a/src/Databases/DDLRenamingVisitor.cpp +++ b/src/Databases/DDLRenamingVisitor.cpp @@ -86,12 +86,19 @@ namespace create.as_table = as_table_new.table; } - QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name}; - if (!to_table.table.empty() && !to_table.database.empty()) + if (create.targets) { - auto to_table_new = data.renaming_map.getNewTableName(to_table); - if (to_table_new != to_table) - create.to_table_id = StorageID{to_table_new.database, to_table_new.table}; + for (auto & target : create.targets->targets) + { + auto & table_id = target.table_id; + if (!table_id.database_name.empty() && !table_id.table_name.empty()) + { + QualifiedTableName target_name{table_id.database_name, table_id.table_name}; + auto new_target_name = data.renaming_map.getNewTableName(target_name); + if (new_target_name != target_name) + table_id = StorageID{new_target_name.database, new_target_name.table}; + } + } } } diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 5017c9b25cb..ca30ee6db15 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -195,7 +195,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n snapshot_detached_tables.erase(table_name); } - CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::add(CurrentMetrics::AttachedTable); } StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name) @@ -221,7 +221,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta .metadata_path = getObjectMetadataPath(table_name), .is_permanently = false}); - CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::sub(CurrentMetrics::AttachedTable); } return res; } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 7ce2859e962..f127ccbc224 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -559,8 +559,11 @@ void DatabaseReplicated::createEmptyLogEntry(const ZooKeeperPtr & current_zookee bool DatabaseReplicated::waitForReplicaToProcessAllEntries(UInt64 timeout_ms) { - if (!ddl_worker || is_probably_dropped) - return false; + { + std::lock_guard lock{ddl_worker_mutex}; + if (!ddl_worker || is_probably_dropped) + return false; + } return ddl_worker->waitForReplicaToProcessAllEntries(timeout_ms); } @@ -641,12 +644,16 @@ LoadTaskPtr DatabaseReplicated::startupDatabaseAsync(AsyncLoader & async_loader, if (is_probably_dropped) return; - ddl_worker = std::make_unique(this, getContext()); - ddl_worker->startup(); - ddl_worker_initialized = true; + { + std::lock_guard lock{ddl_worker_mutex}; + ddl_worker = std::make_unique(this, getContext()); + ddl_worker->startup(); + ddl_worker_initialized = true; + } }); std::scoped_lock lock(mutex); - return startup_replicated_database_task = makeLoadTask(async_loader, {job}); + startup_replicated_database_task = makeLoadTask(async_loader, {job}); + return startup_replicated_database_task; } void DatabaseReplicated::waitDatabaseStarted() const @@ -671,6 +678,178 @@ void DatabaseReplicated::stopLoading() DatabaseAtomic::stopLoading(); } +void DatabaseReplicated::dumpLocalTablesForDebugOnly(const ContextPtr & local_context) const +{ + auto table_names = getAllTableNames(context.lock()); + for (const auto & table_name : table_names) + { + auto ast_ptr = tryGetCreateTableQuery(table_name, local_context); + if (ast_ptr) + LOG_DEBUG(log, "[local] Table {} create query is {}", table_name, queryToString(ast_ptr)); + else + LOG_DEBUG(log, "[local] Table {} has no create query", table_name); + } +} + +void DatabaseReplicated::dumpTablesInZooKeeperForDebugOnly() const +{ + UInt32 max_log_ptr; + auto table_name_to_metadata = tryGetConsistentMetadataSnapshot(getZooKeeper(), max_log_ptr); + for (const auto & [table_name, create_table_query] : table_name_to_metadata) + { + auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_table_query); + if (query_ast) + { + LOG_DEBUG(log, "[zookeeper] Table {} create query is {}", table_name, queryToString(query_ast)); + } + else + { + LOG_DEBUG(log, "[zookeeper] Table {} has no create query", table_name); + } + } +} + +void DatabaseReplicated::tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnly(const ContextPtr & local_context) const +{ + UInt32 max_log_ptr; + auto table_name_to_metadata_in_zk = tryGetConsistentMetadataSnapshot(getZooKeeper(), max_log_ptr); + auto table_names_local = getAllTableNames(local_context); + + if (table_name_to_metadata_in_zk.size() != table_names_local.size()) + LOG_DEBUG(log, "Amount of tables in zk {} locally {}", table_name_to_metadata_in_zk.size(), table_names_local.size()); + + std::unordered_set checked_tables; + + for (const auto & table_name : table_names_local) + { + auto local_ast_ptr = tryGetCreateTableQuery(table_name, local_context); + if (table_name_to_metadata_in_zk.contains(table_name)) + { + checked_tables.insert(table_name); + auto create_table_query_in_zk = table_name_to_metadata_in_zk[table_name]; + auto zk_ast_ptr = parseQueryFromMetadataInZooKeeper(table_name, create_table_query_in_zk); + + if (local_ast_ptr == nullptr && zk_ast_ptr == nullptr) + { + LOG_DEBUG(log, "AST for table {} is the same (nullptr) in local and ZK", table_name); + } + else if (local_ast_ptr != nullptr && zk_ast_ptr != nullptr && queryToString(local_ast_ptr) != queryToString(zk_ast_ptr)) + { + LOG_DEBUG(log, "AST differs for table {}, local {}, in zookeeper {}", table_name, queryToString(local_ast_ptr), queryToString(zk_ast_ptr)); + } + else if (local_ast_ptr == nullptr) + { + LOG_DEBUG(log, "AST differs for table {}, local nullptr, in zookeeper {}", table_name, queryToString(zk_ast_ptr)); + } + else if (zk_ast_ptr == nullptr) + { + LOG_DEBUG(log, "AST differs for table {}, local {}, in zookeeper nullptr", table_name, queryToString(local_ast_ptr)); + } + else + { + LOG_DEBUG(log, "AST for table {} is the same in local and ZK", table_name); + } + } + else + { + if (local_ast_ptr == nullptr) + LOG_DEBUG(log, "Table {} exists locally, but missing in ZK", table_name); + else + LOG_DEBUG(log, "Table {} exists locally with AST {}, but missing in ZK", table_name, queryToString(local_ast_ptr)); + } + } + for (const auto & [table_name, table_metadata] : table_name_to_metadata_in_zk) + { + if (!checked_tables.contains(table_name)) + { + auto zk_ast_ptr = parseQueryFromMetadataInZooKeeper(table_name, table_metadata); + if (zk_ast_ptr == nullptr) + LOG_DEBUG(log, "Table {} exists in ZK with AST {}, but missing locally", table_name, queryToString(zk_ast_ptr)); + else + LOG_DEBUG(log, "Table {} exists in ZK, but missing locally", table_name); + } + } +} + +void DatabaseReplicated::checkTableEngine(const ASTCreateQuery & query, ASTStorage & storage, ContextPtr query_context) const +{ + bool replicated_table = storage.engine && + (startsWith(storage.engine->name, "Replicated") || startsWith(storage.engine->name, "Shared")); + if (!replicated_table || !storage.engine->arguments) + return; + + ASTs & args_ref = storage.engine->arguments->children; + ASTs args = args_ref; + if (args.size() < 2) + return; + + /// It can be a constant expression. Try to evaluate it, ignore exception if we cannot. + bool has_expression_argument = args_ref[0]->as() || args_ref[1]->as(); + if (has_expression_argument) + { + try + { + args[0] = evaluateConstantExpressionAsLiteral(args_ref[0]->clone(), query_context); + args[1] = evaluateConstantExpressionAsLiteral(args_ref[1]->clone(), query_context); + } + catch (...) // NOLINT(bugprone-empty-catch) + { + } + } + + ASTLiteral * arg1 = args[0]->as(); + ASTLiteral * arg2 = args[1]->as(); + if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String) + return; + + String maybe_path = arg1->value.get(); + String maybe_replica = arg2->value.get(); + + /// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments. + /// Let's ensure that some macros are used. + /// NOTE: we cannot check here that substituted values will be actually different on shards and replicas. + + Macros::MacroExpansionInfo info; + info.table_id = {getDatabaseName(), query.getTable(), query.uuid}; + info.shard = getShardName(); + info.replica = getReplicaName(); + query_context->getMacros()->expand(maybe_path, info); + bool maybe_shard_macros = info.expanded_other; + info.expanded_other = false; + query_context->getMacros()->expand(maybe_replica, info); + bool maybe_replica_macros = info.expanded_other; + bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros"); + + if (!enable_functional_tests_helper) + { + if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments) + LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments"); + else + throw Exception(ErrorCodes::INCORRECT_QUERY, + "It's not allowed to specify explicit zookeeper_path and replica_name " + "for ReplicatedMergeTree arguments in Replicated database. If you really want to " + "specify them explicitly, enable setting " + "database_replicated_allow_replicated_engine_arguments."); + } + + if (maybe_shard_macros && maybe_replica_macros) + return; + + if (enable_functional_tests_helper && !has_expression_argument) + { + if (maybe_path.empty() || maybe_path.back() != '/') + maybe_path += '/'; + args_ref[0]->as()->value = maybe_path + "auto_{shard}"; + args_ref[1]->as()->value = maybe_replica + "auto_{replica}"; + return; + } + + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. " + "If you really want to specify it explicitly, then you should use some macros " + "to distinguish different shards and replicas"); +} + bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool debug_check /* = true */) const { if (debug_check) @@ -696,6 +875,13 @@ bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool if (local_digest != tables_metadata_digest) { LOG_ERROR(log, "Digest of local metadata ({}) is not equal to in-memory digest ({})", local_digest, tables_metadata_digest); + +#ifndef NDEBUG + dumpLocalTablesForDebugOnly(local_context); + dumpTablesInZooKeeperForDebugOnly(); + tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnly(local_context); +#endif + return false; } @@ -712,6 +898,11 @@ bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool if (zk_digest != local_digest_str) { LOG_ERROR(log, "Digest of local metadata ({}) is not equal to digest in Keeper ({})", local_digest_str, zk_digest); +#ifndef NDEBUG + dumpLocalTablesForDebugOnly(local_context); + dumpTablesInZooKeeperForDebugOnly(); + tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnly(local_context); +#endif return false; } @@ -729,81 +920,14 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ if (auto * create = query->as()) { - bool replicated_table = create->storage && create->storage->engine && - (startsWith(create->storage->engine->name, "Replicated") || startsWith(create->storage->engine->name, "Shared")); - if (!replicated_table || !create->storage->engine->arguments) - return; + if (create->storage) + checkTableEngine(*create, *create->storage, query_context); - ASTs & args_ref = create->storage->engine->arguments->children; - ASTs args = args_ref; - if (args.size() < 2) - return; - - /// It can be a constant expression. Try to evaluate it, ignore exception if we cannot. - bool has_expression_argument = args_ref[0]->as() || args_ref[1]->as(); - if (has_expression_argument) + if (create->targets) { - try - { - args[0] = evaluateConstantExpressionAsLiteral(args_ref[0]->clone(), query_context); - args[1] = evaluateConstantExpressionAsLiteral(args_ref[1]->clone(), query_context); - } - catch (...) // NOLINT(bugprone-empty-catch) - { - } + for (const auto & inner_table_engine : create->targets->getInnerEngines()) + checkTableEngine(*create, *inner_table_engine, query_context); } - - ASTLiteral * arg1 = args[0]->as(); - ASTLiteral * arg2 = args[1]->as(); - if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String) - return; - - String maybe_path = arg1->value.get(); - String maybe_replica = arg2->value.get(); - - /// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments. - /// Let's ensure that some macros are used. - /// NOTE: we cannot check here that substituted values will be actually different on shards and replicas. - - Macros::MacroExpansionInfo info; - info.table_id = {getDatabaseName(), create->getTable(), create->uuid}; - info.shard = getShardName(); - info.replica = getReplicaName(); - query_context->getMacros()->expand(maybe_path, info); - bool maybe_shard_macros = info.expanded_other; - info.expanded_other = false; - query_context->getMacros()->expand(maybe_replica, info); - bool maybe_replica_macros = info.expanded_other; - bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros"); - - if (!enable_functional_tests_helper) - { - if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments) - LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments"); - else - throw Exception(ErrorCodes::INCORRECT_QUERY, - "It's not allowed to specify explicit zookeeper_path and replica_name " - "for ReplicatedMergeTree arguments in Replicated database. If you really want to " - "specify them explicitly, enable setting " - "database_replicated_allow_replicated_engine_arguments."); - } - - if (maybe_shard_macros && maybe_replica_macros) - return; - - if (enable_functional_tests_helper && !has_expression_argument) - { - if (maybe_path.empty() || maybe_path.back() != '/') - maybe_path += '/'; - args_ref[0]->as()->value = maybe_path + "auto_{shard}"; - args_ref[1]->as()->value = maybe_replica + "auto_{replica}"; - return; - } - - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. " - "If you really want to specify it explicitly, then you should use some macros " - "to distinguish different shards and replicas"); } } @@ -1241,7 +1365,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep current_zookeeper->set(replica_path + "/digest", toString(tables_metadata_digest)); } -std::map DatabaseReplicated::tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr) +std::map DatabaseReplicated::tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr) const { return getConsistentMetadataSnapshotImpl(zookeeper, {}, /* max_retries= */ 10, max_log_ptr); } @@ -1302,7 +1426,7 @@ std::map DatabaseReplicated::getConsistentMetadataSnapshotImpl( return table_name_to_metadata; } -ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query) +ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query) const { ParserCreateQuery parser; String description = "in ZooKeeper " + zookeeper_path + "/metadata/" + node_name; @@ -1312,11 +1436,9 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node if (create.uuid == UUIDHelpers::Nil || create.getTable() != TABLE_WITH_UUID_NAME_PLACEHOLDER || create.database) throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected query from {}: {}", node_name, query); - bool is_materialized_view_with_inner_table = create.is_materialized_view && create.to_table_id.empty(); - create.setDatabase(getDatabaseName()); create.setTable(unescapeForFileName(node_name)); - create.attach = is_materialized_view_with_inner_table; + create.attach = create.is_materialized_view_with_inner_table(); return ast; } @@ -1401,6 +1523,7 @@ void DatabaseReplicated::renameDatabase(ContextPtr query_context, const String & void DatabaseReplicated::stopReplication() { + std::lock_guard lock{ddl_worker_mutex}; if (ddl_worker) ddl_worker->shutdown(); } @@ -1408,8 +1531,11 @@ void DatabaseReplicated::stopReplication() void DatabaseReplicated::shutdown() { stopReplication(); - ddl_worker_initialized = false; - ddl_worker = nullptr; + { + std::lock_guard lock{ddl_worker_mutex}; + ddl_worker_initialized = false; + ddl_worker = nullptr; + } DatabaseAtomic::shutdown(); } @@ -1557,6 +1683,7 @@ bool DatabaseReplicated::canExecuteReplicatedMetadataAlter() const /// It may update the metadata digest (both locally and in ZooKeeper) /// before DatabaseReplicatedDDLWorker::initializeReplication() has finished. /// We should not update metadata until the database is initialized. + std::lock_guard lock{ddl_worker_mutex}; return ddl_worker_initialized && ddl_worker->isCurrentlyActive(); } diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index eab5b2ff931..27ab262d1f1 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -107,15 +107,17 @@ private: void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config); void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const; + void checkTableEngine(const ASTCreateQuery & query, ASTStorage & storage, ContextPtr query_context) const; + void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 & max_log_ptr); - std::map tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr); + std::map tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr) const; std::map getConsistentMetadataSnapshotImpl(const ZooKeeperPtr & zookeeper, const FilterByNameFunction & filter_by_table_name, size_t max_retries, UInt32 & max_log_ptr) const; - ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query); + ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query) const; String readMetadataFile(const String & table_name) const; ClusterPtr getClusterImpl(bool all_groups = false) const; @@ -131,6 +133,11 @@ private: UInt64 getMetadataHash(const String & table_name) const; bool checkDigestValid(const ContextPtr & local_context, bool debug_check = true) const TSA_REQUIRES(metadata_mutex); + /// For debug purposes only, don't use in production code + void dumpLocalTablesForDebugOnly(const ContextPtr & local_context) const; + void dumpTablesInZooKeeperForDebugOnly() const; + void tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnly(const ContextPtr & local_context) const; + void waitDatabaseStarted() const override; void stopLoading() override; @@ -148,6 +155,7 @@ private: std::atomic_bool is_recovering = false; std::atomic_bool ddl_worker_initialized = false; std::unique_ptr ddl_worker; + mutable std::mutex ddl_worker_mutex; UInt32 max_log_ptr_at_creation = 0; /// Usually operation with metadata are single-threaded because of the way replication works, diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index fe0baf30e57..d8151cf340a 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -149,7 +149,7 @@ ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_ columns = metadata_ptr->columns.getAll(); for (const auto & column_name_and_type: columns) { - const auto & ast_column_declaration = std::make_shared(); + const auto ast_column_declaration = std::make_shared(); ast_column_declaration->name = column_name_and_type.name; /// parser typename { @@ -164,7 +164,7 @@ ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_ if (!parser.parse(pos, ast_type, expected)) { if (throw_on_error) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parser metadata of {}.{}", + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parse metadata of {}.{}", backQuote(table_id.database_name), backQuote(table_id.table_name)); else return nullptr; @@ -289,8 +289,11 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n tables.erase(it); table_storage->is_detached = true; - if (table_storage->isSystemStorage() == false) - CurrentMetrics::sub(getAttachedCounterForStorage(table_storage), 1); + if (!table_storage->isSystemStorage() && !DatabaseCatalog::isPredefinedDatabase(database_name)) + { + LOG_TEST(log, "Counting detached table {} to database {}", table_name, database_name); + CurrentMetrics::sub(getAttachedCounterForStorage(table_storage)); + } auto table_id = table_storage->getStorageID(); if (table_id.hasUUID()) @@ -334,8 +337,11 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - if (table->isSystemStorage() == false && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) - CurrentMetrics::add(getAttachedCounterForStorage(table), 1); + if (!table->isSystemStorage() && !DatabaseCatalog::isPredefinedDatabase(database_name)) + { + LOG_TEST(log, "Counting attached table {} to database {}", table_name, database_name); + CurrentMetrics::add(getAttachedCounterForStorage(table)); + } } void DatabaseWithOwnTablesBase::shutdown() diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index a846e23cd4f..032fc33ea16 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -12,9 +12,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -25,6 +25,7 @@ #include #include + namespace fs = std::filesystem; namespace DB @@ -432,7 +433,7 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co auto metadata_snapshot = storage->getInMemoryMetadataPtr(); for (const auto & column_type_and_name : metadata_snapshot->getColumns().getOrdinary()) { - const auto & column_declaration = std::make_shared(); + const auto column_declaration = std::make_shared(); column_declaration->name = column_type_and_name.name; column_declaration->type = getColumnDeclaration(column_type_and_name.type); columns_expression_list->children.emplace_back(column_declaration); @@ -470,17 +471,15 @@ ASTPtr DatabasePostgreSQL::getColumnDeclaration(const DataTypePtr & data_type) c WhichDataType which(data_type); if (which.isNullable()) - return makeASTFunction("Nullable", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); + return makeASTDataType("Nullable", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); if (which.isArray()) - return makeASTFunction("Array", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); + return makeASTDataType("Array", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); if (which.isDateTime64()) - { - return makeASTFunction("DateTime64", std::make_shared(static_cast(6))); - } + return makeASTDataType("DateTime64", std::make_shared(static_cast(6))); - return std::make_shared(data_type->getName()); + return makeASTDataType(data_type->getName()); } void registerDatabasePostgreSQL(DatabaseFactory & factory) diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp index 132a978140c..471730fce29 100644 --- a/src/Databases/SQLite/DatabaseSQLite.cpp +++ b/src/Databases/SQLite/DatabaseSQLite.cpp @@ -154,6 +154,7 @@ StoragePtr DatabaseSQLite::fetchTable(const String & table_name, ContextPtr loca table_name, ColumnsDescription{*columns}, ConstraintsDescription{}, + /* comment = */ "", local_context); return storage; diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.h b/src/Disks/IO/AsynchronousBoundedReadBuffer.h index 9a802348998..3dc8fcc39cb 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.h +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.h @@ -34,7 +34,7 @@ public: String getFileName() const override { return impl->getFileName(); } - size_t getFileSize() override { return impl->getFileSize(); } + std::optional tryGetFileSize() override { return impl->tryGetFileSize(); } String getInfoForLog() override { return impl->getInfoForLog(); } diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index e9c642666d3..c928d25c7b8 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -59,7 +59,7 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile( std::optional read_until_position_, std::shared_ptr cache_log_) : ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0, file_size_) -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD , log(getLogger(fmt::format("CachedOnDiskReadBufferFromFile({})", cache_key_))) #else , log(getLogger("CachedOnDiskReadBufferFromFile")) @@ -452,7 +452,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme { case ReadType::CACHED: { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD size_t file_size = getFileSizeFromReadBuffer(*read_buffer_for_file_segment); if (file_size == 0 || range.left + file_size <= file_offset_of_buffer_end) throw Exception( @@ -810,6 +810,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() { last_caller_id = FileSegment::getCallerId(); + chassert(file_offset_of_buffer_end <= read_until_position); if (file_offset_of_buffer_end == read_until_position) return false; @@ -937,7 +938,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() if (!result) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD if (read_type == ReadType::CACHED) { size_t cache_file_size = getFileSizeFromReadBuffer(*implementation_buffer); @@ -1051,7 +1052,11 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() if (download_current_segment && download_current_segment_succeeded) chassert(file_segment.getCurrentWriteOffset() >= file_offset_of_buffer_end); - chassert(file_offset_of_buffer_end <= read_until_position); + + chassert( + file_offset_of_buffer_end <= read_until_position, + fmt::format("Expected {} <= {} (size: {}, read range: {})", + file_offset_of_buffer_end, read_until_position, size, current_read_range.toString())); } swap(*implementation_buffer); diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index da1ea65f2ea..ba864035777 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -253,16 +253,15 @@ void ReadBufferFromAzureBlobStorage::initialize() initialized = true; } -size_t ReadBufferFromAzureBlobStorage::getFileSize() +std::optional ReadBufferFromAzureBlobStorage::tryGetFileSize() { if (!blob_client) blob_client = std::make_unique(blob_container_client->GetBlobClient(path)); - if (file_size.has_value()) - return *file_size; + if (!file_size) + file_size = blob_client->GetProperties().Value.BlobSize; - file_size = blob_client->GetProperties().Value.BlobSize; - return *file_size; + return file_size; } size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t range_begin, const std::function & /*progress_callback*/) const diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.h b/src/Disks/IO/ReadBufferFromAzureBlobStorage.h index d328195cc26..f407f27e099 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.h @@ -42,7 +42,7 @@ public: bool supportsRightBoundedReads() const override { return true; } - size_t getFileSize() override; + std::optional tryGetFileSize() override; size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) const override; diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index e36365a8174..9f1cb681f1a 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -41,7 +41,7 @@ public: void setReadUntilEnd() override { setReadUntilPosition(getFileSize()); } - size_t getFileSize() override { return getTotalSize(blobs_to_read); } + std::optional tryGetFileSize() override { return getTotalSize(blobs_to_read); } size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index e7c85bea1c6..b5805f6d23a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -874,7 +874,9 @@ void DiskObjectStorageTransaction::writeFileUsingBlobWritingFunction( /// Create metadata (see create_metadata_callback in DiskObjectStorageTransaction::writeFile()). if (mode == WriteMode::Rewrite) { - if (!object_storage.isWriteOnce() && metadata_storage.exists(path)) + /// Otherwise we will produce lost blobs which nobody points to + /// WriteOnce storages are not affected by the issue + if (!object_storage.isPlain() && metadata_storage.exists(path)) object_storage.removeObjectsIfExist(metadata_storage.getStorageObjects(path)); metadata_transaction->createMetadataFile(path, std::move(object_key), object_size); diff --git a/src/Disks/TemporaryFileOnDisk.cpp b/src/Disks/TemporaryFileOnDisk.cpp index 91eb214d941..88674e068d9 100644 --- a/src/Disks/TemporaryFileOnDisk.cpp +++ b/src/Disks/TemporaryFileOnDisk.cpp @@ -58,7 +58,8 @@ TemporaryFileOnDisk::~TemporaryFileOnDisk() if (!disk->exists(relative_path)) { - LOG_WARNING(getLogger("TemporaryFileOnDisk"), "Temporary path '{}' does not exist in '{}'", relative_path, disk->getPath()); + if (show_warning_if_removed) + LOG_WARNING(getLogger("TemporaryFileOnDisk"), "Temporary path '{}' does not exist in '{}'", relative_path, disk->getPath()); return; } diff --git a/src/Disks/TemporaryFileOnDisk.h b/src/Disks/TemporaryFileOnDisk.h index cccfc82cf9e..d0ff44c6f03 100644 --- a/src/Disks/TemporaryFileOnDisk.h +++ b/src/Disks/TemporaryFileOnDisk.h @@ -27,12 +27,19 @@ public: /// Return relative path (without disk) const String & getRelativePath() const { return relative_path; } + /// Sets whether the destructor should show a warning if the temporary file has been already removed. + /// By default a warning is shown. + void setShowWarningIfRemoved(bool show_warning_if_removed_) { show_warning_if_removed = show_warning_if_removed_; } + private: DiskPtr disk; /// Relative path in disk to the temporary file or directory String relative_path; + /// Whether the destructor should show a warning if the temporary file has been already removed. + bool show_warning_if_removed = true; + CurrentMetrics::Increment metric_increment; /// Specified if we know what for file is used (sort/aggregate/join). diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 1271cdfb7ad..efd467abb37 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -150,7 +150,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects; format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence; format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields; - format_settings.json.ignore_key_case = settings.input_format_json_ignore_key_case; + format_settings.json.case_insensitive_column_matching = settings.input_format_json_case_insensitive_column_matching; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; @@ -215,7 +215,6 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.tsv.allow_variable_number_of_columns = settings.input_format_tsv_allow_variable_number_of_columns; format_settings.tsv.crlf_end_of_line_input = settings.input_format_tsv_crlf_end_of_line; format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals; - format_settings.values.allow_data_after_semicolon = settings.input_format_values_allow_data_after_semicolon; format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions; format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions; format_settings.values.escape_quote_with_quote = settings.output_format_values_escape_quote_with_quote; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index a736bb1a558..72d1515b7e1 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -233,7 +233,7 @@ struct FormatSettings bool infer_incomplete_types_as_strings = true; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; - bool ignore_key_case = false; + bool case_insensitive_column_matching = false; } json{}; struct diff --git a/src/Functions/CRC.cpp b/src/Functions/CRC.cpp index 0ba976669a3..73318d523b5 100644 --- a/src/Functions/CRC.cpp +++ b/src/Functions/CRC.cpp @@ -81,46 +81,43 @@ struct CRCFunctionWrapper static constexpr auto is_fixed_to_constant = true; using ReturnType = typename Impl::ReturnType; - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); - ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = doCRC(data, prev_offset, offsets[i] - prev_offset - 1); prev_offset = offsets[i]; } } - static void vectorFixedToConstant(const ColumnString::Chars & data, size_t n, ReturnType & res) { res = doCRC(data, 0, n); } - - static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + static void vectorFixedToConstant(const ColumnString::Chars & data, size_t n, ReturnType & res, size_t) { - size_t size = data.size() / n; - - for (size_t i = 0; i < size; ++i) - { - res[i] = doCRC(data, i * n, n); - } + res = doCRC(data, 0, n); } - [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) + static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res, size_t input_rows_count) + { + for (size_t i = 0; i < input_rows_count; ++i) + res[i] = doCRC(data, i * n, n); + } + + [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to Array argument", std::string(Impl::name)); } - [[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to UUID argument", std::string(Impl::name)); } - [[noreturn]] static void ipv6(const ColumnIPv6::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv6 argument", std::string(Impl::name)); } - [[noreturn]] static void ipv4(const ColumnIPv4::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv4 argument", std::string(Impl::name)); } diff --git a/src/Functions/CustomWeekTransforms.h b/src/Functions/CustomWeekTransforms.h index 75fb3c32f16..5b4de765362 100644 --- a/src/Functions/CustomWeekTransforms.h +++ b/src/Functions/CustomWeekTransforms.h @@ -32,13 +32,12 @@ struct WeekTransformer {} template - void vector(const FromVectorType & vec_from, ToVectorType & vec_to, UInt8 week_mode, const DateLUTImpl & time_zone) const + void vector(const FromVectorType & vec_from, ToVectorType & vec_to, UInt8 week_mode, const DateLUTImpl & time_zone, size_t input_rows_count) const { using ValueType = typename ToVectorType::value_type; - size_t size = vec_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (is_extended_result) vec_to[i] = static_cast(transform.executeExtendedResult(vec_from[i], week_mode, time_zone)); @@ -56,7 +55,7 @@ template - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, Transform transform = {}) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count, Transform transform = {}) { const auto op = WeekTransformer{transform}; @@ -77,9 +76,9 @@ struct CustomWeekTransformImpl const auto * sources = checkAndGetColumn(source_col.get()); auto col_to = ToDataType::ColumnType::create(); - col_to->getData().resize(sources->size()); + col_to->getData().resize(input_rows_count); - for (size_t i = 0; i < sources->size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { DateTime64 dt64; ReadBufferFromString buf(sources->getDataAt(i).toView()); @@ -92,7 +91,7 @@ struct CustomWeekTransformImpl else if (const auto * sources = checkAndGetColumn(source_col.get())) { auto col_to = ToDataType::ColumnType::create(); - op.vector(sources->getData(), col_to->getData(), week_mode, time_zone); + op.vector(sources->getData(), col_to->getData(), week_mode, time_zone, input_rows_count); return col_to; } else diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 34c59ecab08..a7bd398cdaa 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -24,7 +24,7 @@ namespace DB static constexpr auto millisecond_multiplier = 1'000; static constexpr auto microsecond_multiplier = 1'000'000; -static constexpr auto nanosecond_multiplier = 1'000'000'000; +static constexpr auto nanosecond_multiplier = 1'000'000'000; static constexpr FormatSettings::DateTimeOverflowBehavior default_date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore; @@ -1954,7 +1954,10 @@ struct ToRelativeSubsecondNumImpl return t.value; if (scale > scale_multiplier) return t.value / (scale / scale_multiplier); - return t.value * (scale_multiplier / scale); + return static_cast(t.value) * static_cast((scale_multiplier / scale)); + /// Casting ^^: All integers are Int64, yet if t.value is big enough the multiplication can still + /// overflow which is UB. This place is too low-level and generic to check if t.value is sane. + /// Therefore just let it overflow safely and don't bother further. } static Int64 execute(UInt32 t, const DateLUTImpl &) { @@ -2131,13 +2134,12 @@ struct Transformer { template static void vector(const FromTypeVector & vec_from, ToTypeVector & vec_to, const DateLUTImpl & time_zone, const Transform & transform, - [[maybe_unused]] ColumnUInt8::Container * vec_null_map_to) + [[maybe_unused]] ColumnUInt8::Container * vec_null_map_to, size_t input_rows_count) { using ValueType = typename ToTypeVector::value_type; - size_t size = vec_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (std::is_same_v || std::is_same_v) { @@ -2175,7 +2177,7 @@ struct DateTimeTransformImpl { template static ColumnPtr execute( - const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const Transform & transform = {}) + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const Transform & transform = {}) { using Op = Transformer; @@ -2197,7 +2199,7 @@ struct DateTimeTransformImpl if (result_data_type.isDateTime() || result_data_type.isDateTime64()) { const auto & time_zone = dynamic_cast(*result_type).getTimeZone(); - Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to); + Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to, input_rows_count); } else { @@ -2206,15 +2208,13 @@ struct DateTimeTransformImpl time_zone_argument_position = 2; const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_argument_position, 0); - Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to); + Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to, input_rows_count); } if constexpr (std::is_same_v) { if (vec_null_map_to) - { return ColumnNullable::create(std::move(mutable_result_col), std::move(col_null_map_to)); - } } return mutable_result_col; diff --git a/src/Functions/EmptyImpl.h b/src/Functions/EmptyImpl.h index d3b2dda024b..03675254eb6 100644 --- a/src/Functions/EmptyImpl.h +++ b/src/Functions/EmptyImpl.h @@ -21,11 +21,10 @@ struct EmptyImpl /// If the function will return constant value for FixedString data type. static constexpr auto is_fixed_to_constant = false; - static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); ColumnString::Offset prev_offset = 1; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = negative ^ (offsets[i] == prev_offset); prev_offset = offsets[i] + 1; @@ -33,42 +32,40 @@ struct EmptyImpl } /// Only make sense if is_fixed_to_constant. - static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/) + static void vectorFixedToConstant(const ColumnString::Chars &, size_t, UInt8 &, size_t) { throw Exception(ErrorCodes::LOGICAL_ERROR, "'vectorFixedToConstant method' is called"); } - static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res, size_t input_rows_count) { - size_t size = data.size() / n; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) res[i] = negative ^ memoryIsZeroSmallAllowOverflow15(data.data() + i * n, n); } - static void array(const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void array(const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = negative ^ (offsets[i] == prev_offset); prev_offset = offsets[i]; } } - static void uuid(const ColumnUUID::Container & container, size_t n, PaddedPODArray & res) + static void uuid(const ColumnUUID::Container & container, size_t n, PaddedPODArray & res, size_t) { for (size_t i = 0; i < n; ++i) res[i] = negative ^ (container[i].toUnderType() == 0); } - static void ipv6(const ColumnIPv6::Container & container, size_t n, PaddedPODArray & res) + static void ipv6(const ColumnIPv6::Container & container, size_t n, PaddedPODArray & res, size_t) { for (size_t i = 0; i < n; ++i) res[i] = negative ^ (container[i].toUnderType() == 0); } - static void ipv4(const ColumnIPv4::Container & container, size_t n, PaddedPODArray & res) + static void ipv4(const ColumnIPv4::Container & container, size_t n, PaddedPODArray & res, size_t) { for (size_t i = 0; i < n; ++i) res[i] = negative ^ (container[i].toUnderType() == 0); diff --git a/src/Functions/ExtractString.h b/src/Functions/ExtractString.h index 6beb8be830a..9f039f86b18 100644 --- a/src/Functions/ExtractString.h +++ b/src/Functions/ExtractString.h @@ -20,7 +20,7 @@ namespace DB // includes extracting ASCII ngram, UTF8 ngram, ASCII word and UTF8 word struct ExtractStringImpl { - static ALWAYS_INLINE inline const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end) + static const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end) { // jump separators while (pos < end && isUTF8Sep(*pos)) diff --git a/src/Functions/FunctionBase58Conversion.h b/src/Functions/FunctionBase58Conversion.h index e519f9768cc..a34a36d8b81 100644 --- a/src/Functions/FunctionBase58Conversion.h +++ b/src/Functions/FunctionBase58Conversion.h @@ -18,6 +18,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } struct Base58Encode @@ -135,7 +136,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong number of arguments for function {}: 1 expected.", getName()); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function {}: 1 expected.", getName()); if (!isString(arguments[0].type)) throw Exception( diff --git a/src/Functions/FunctionBitTestMany.h b/src/Functions/FunctionBitTestMany.h index 950e4ab4ea8..514b78ce59f 100644 --- a/src/Functions/FunctionBitTestMany.h +++ b/src/Functions/FunctionBitTestMany.h @@ -46,7 +46,7 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", first_arg->getName(), getName()); - for (const auto i : collections::range(1, arguments.size())) + for (size_t i = 1; i < arguments.size(); ++i) { const auto & pos_arg = arguments[i]; @@ -57,19 +57,19 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const auto * value_col = arguments.front().column.get(); ColumnPtr res; - if (!((res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)))) + if (!((res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", value_col->getName(), getName()); return res; @@ -79,28 +79,28 @@ private: template ColumnPtr execute( const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, - const IColumn * const value_col_untyped) const + const IColumn * const value_col_untyped, + size_t input_rows_count) const { if (const auto value_col = checkAndGetColumn>(value_col_untyped)) { - const auto size = value_col->size(); bool is_const; const auto const_mask = createConstMaskIfConst(arguments, is_const); const auto & val = value_col->getData(); - auto out_col = ColumnVector::create(size); + auto out_col = ColumnVector::create(input_rows_count); auto & out = out_col->getData(); if (is_const) { - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < input_rows_count; ++i) out[i] = Impl::apply(val[i], const_mask); } else { - const auto mask = createMask(size, arguments); + const auto mask = createMask(input_rows_count, arguments); - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < input_rows_count; ++i) out[i] = Impl::apply(val[i], mask[i]); } @@ -108,23 +108,22 @@ private: } else if (const auto value_col_const = checkAndGetColumnConst>(value_col_untyped)) { - const auto size = value_col_const->size(); bool is_const; const auto const_mask = createConstMaskIfConst(arguments, is_const); const auto val = value_col_const->template getValue(); if (is_const) { - return result_type->createColumnConst(size, toField(Impl::apply(val, const_mask))); + return result_type->createColumnConst(input_rows_count, toField(Impl::apply(val, const_mask))); } else { - const auto mask = createMask(size, arguments); - auto out_col = ColumnVector::create(size); + const auto mask = createMask(input_rows_count, arguments); + auto out_col = ColumnVector::create(input_rows_count); auto & out = out_col->getData(); - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < input_rows_count; ++i) out[i] = Impl::apply(val, mask[i]); return out_col; @@ -140,7 +139,7 @@ private: out_is_const = true; ValueType mask = 0; - for (const auto i : collections::range(1, arguments.size())) + for (size_t i = 1; i < arguments.size(); ++i) { if (auto pos_col_const = checkAndGetColumnConst>(arguments[i].column.get())) { @@ -166,7 +165,7 @@ private: { PaddedPODArray mask(size, ValueType{}); - for (const auto i : collections::range(1, arguments.size())) + for (size_t i = 1; i < arguments.size(); ++i) { const auto * pos_col = arguments[i].column.get(); @@ -187,7 +186,7 @@ private: { const auto & pos = pos_col->getData(); - for (const auto i : collections::range(0, mask.size())) + for (size_t i = 0; i < mask.size(); ++i) if (pos[i] < 8 * sizeof(ValueType)) mask[i] = mask[i] | (ValueType(1) << pos[i]); else @@ -205,7 +204,7 @@ private: const auto new_mask = ValueType(1) << pos; - for (const auto i : collections::range(0, mask.size())) + for (size_t i = 0; i < mask.size(); ++i) mask[i] = mask[i] | new_mask; return true; diff --git a/src/Functions/FunctionChar.cpp b/src/Functions/FunctionChar.cpp index 0ebe1442f08..dbdb7692177 100644 --- a/src/Functions/FunctionChar.cpp +++ b/src/Functions/FunctionChar.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; } class FunctionChar : public IFunction @@ -36,7 +37,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Number of arguments for function {} can't be {}, should be at least 1", getName(), arguments.size()); @@ -102,14 +103,11 @@ private: const ColumnVector * src_data_concrete = checkAndGetColumn>(&src_data); if (!src_data_concrete) - { return false; - } for (size_t row = 0; row < rows; ++row) - { out_vec[row * size_per_row + column_idx] = static_cast(src_data_concrete->getInt(row)); - } + return true; } }; diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index f50b1415622..25231b8887b 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -428,19 +428,17 @@ struct Processor {} template - void NO_INLINE vectorConstant(const FromColumnType & col_from, ToColumnType & col_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const + void NO_INLINE vectorConstant(const FromColumnType & col_from, ToColumnType & col_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const { static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); if constexpr (std::is_same_v) { - const auto & offsets_from = col_from.getOffsets(); auto & vec_to = col_to.getData(); - size_t size = offsets_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0 ; i < size; ++i) + for (size_t i = 0 ; i < input_rows_count; ++i) { std::string_view from = col_from.getDataAt(i).toView(); vec_to[i] = transform.execute(from, checkOverflow(delta), time_zone, utc_time_zone, scale); @@ -451,32 +449,31 @@ struct Processor const auto & vec_from = col_from.getData(); auto & vec_to = col_to.getData(); - size_t size = vec_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone, utc_time_zone, scale); } } template - void vectorVector(const FromColumnType & col_from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const + void vectorVector(const FromColumnType & col_from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const { castTypeToEither< ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat32, ColumnFloat64>( - &delta, [&](const auto & column){ vectorVector(col_from, col_to, column, time_zone, scale); return true; }); + &delta, [&](const auto & column){ vectorVector(col_from, col_to, column, time_zone, scale, input_rows_count); return true; }); } template - void constantVector(const FromType & from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const + void constantVector(const FromType & from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const { castTypeToEither< ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat32, ColumnFloat64>( - &delta, [&](const auto & column){ constantVector(from, col_to, column, time_zone, scale); return true; }); + &delta, [&](const auto & column){ constantVector(from, col_to, column, time_zone, scale, input_rows_count); return true; }); } private: @@ -491,19 +488,17 @@ private: template NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector( - const FromColumnType & col_from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale) const + const FromColumnType & col_from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const { static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); if constexpr (std::is_same_v) { - const auto & offsets_from = col_from.getOffsets(); auto & vec_to = col_to.getData(); - size_t size = offsets_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0 ; i < size; ++i) + for (size_t i = 0 ; i < input_rows_count; ++i) { std::string_view from = col_from.getDataAt(i).toView(); vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale); @@ -514,26 +509,24 @@ private: const auto & vec_from = col_from.getData(); auto & vec_to = col_to.getData(); - size_t size = vec_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale); } } template NO_INLINE NO_SANITIZE_UNDEFINED void constantVector( - const FromType & from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale) const + const FromType & from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const { static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); auto & vec_to = col_to.getData(); - size_t size = delta.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale); } }; @@ -542,7 +535,7 @@ private: template struct DateTimeAddIntervalImpl { - static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale) + static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale, size_t input_rows_count) { using FromValueType = typename FromDataType::FieldType; using FromColumnType = typename FromDataType::ColumnType; @@ -561,15 +554,15 @@ struct DateTimeAddIntervalImpl if (const auto * sources = checkAndGetColumn(&source_column)) { if (const auto * delta_const_column = typeid_cast(&delta_column)) - processor.vectorConstant(*sources, *col_to, delta_const_column->getInt(0), time_zone, scale); + processor.vectorConstant(*sources, *col_to, delta_const_column->getInt(0), time_zone, scale, input_rows_count); else - processor.vectorVector(*sources, *col_to, delta_column, time_zone, scale); + processor.vectorVector(*sources, *col_to, delta_column, time_zone, scale, input_rows_count); } else if (const auto * sources_const = checkAndGetColumnConst(&source_column)) { processor.constantVector( sources_const->template getValue(), - *col_to, delta_column, time_zone, scale); + *col_to, delta_column, time_zone, scale, input_rows_count); } else { @@ -708,25 +701,25 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); WhichDataType which(from_type); if (which.isDate()) - return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0); + return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0, input_rows_count); else if (which.isDate32()) - return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0); + return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0, input_rows_count); else if (which.isDateTime()) - return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0); + return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0, input_rows_count); else if (which.isDateTime64()) { const auto * datetime64_type = assert_cast(from_type); auto from_scale = datetime64_type->getScale(); - return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, from_scale); + return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, from_scale, input_rows_count); } else if (which.isString()) - return DateTimeAddIntervalImpl::execute(Transform{}, arguments, result_type, 3); + return DateTimeAddIntervalImpl::execute(Transform{}, arguments, result_type, 3, input_rows_count); else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", arguments[0].type->getName(), getName()); } diff --git a/src/Functions/FunctionMathBinaryFloat64.h b/src/Functions/FunctionMathBinaryFloat64.h index 1b75ee688f4..82ff6ae7fbf 100644 --- a/src/Functions/FunctionMathBinaryFloat64.h +++ b/src/Functions/FunctionMathBinaryFloat64.h @@ -54,7 +54,7 @@ private: } template - static ColumnPtr executeTyped(const ColumnConst * left_arg, const IColumn * right_arg) + static ColumnPtr executeTyped(const ColumnConst * left_arg, const IColumn * right_arg, size_t input_rows_count) { if (const auto right_arg_typed = checkAndGetColumn>(right_arg)) { @@ -63,12 +63,11 @@ private: LeftType left_src_data[Impl::rows_per_iteration]; std::fill(std::begin(left_src_data), std::end(left_src_data), left_arg->template getValue()); const auto & right_src_data = right_arg_typed->getData(); - const auto src_size = right_src_data.size(); auto & dst_data = dst->getData(); - dst_data.resize(src_size); + dst_data.resize(input_rows_count); - const auto rows_remaining = src_size % Impl::rows_per_iteration; - const auto rows_size = src_size - rows_remaining; + const auto rows_remaining = input_rows_count % Impl::rows_per_iteration; + const auto rows_size = input_rows_count - rows_remaining; for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration) Impl::execute(left_src_data, &right_src_data[i], &dst_data[i]); @@ -92,7 +91,7 @@ private: } template - static ColumnPtr executeTyped(const ColumnVector * left_arg, const IColumn * right_arg) + static ColumnPtr executeTyped(const ColumnVector * left_arg, const IColumn * right_arg, size_t input_rows_count) { if (const auto right_arg_typed = checkAndGetColumn>(right_arg)) { @@ -100,12 +99,11 @@ private: const auto & left_src_data = left_arg->getData(); const auto & right_src_data = right_arg_typed->getData(); - const auto src_size = left_src_data.size(); auto & dst_data = dst->getData(); - dst_data.resize(src_size); + dst_data.resize(input_rows_count); - const auto rows_remaining = src_size % Impl::rows_per_iteration; - const auto rows_size = src_size - rows_remaining; + const auto rows_remaining = input_rows_count % Impl::rows_per_iteration; + const auto rows_size = input_rows_count - rows_remaining; for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration) Impl::execute(&left_src_data[i], &right_src_data[i], &dst_data[i]); @@ -136,12 +134,11 @@ private: const auto & left_src_data = left_arg->getData(); RightType right_src_data[Impl::rows_per_iteration]; std::fill(std::begin(right_src_data), std::end(right_src_data), right_arg_typed->template getValue()); - const auto src_size = left_src_data.size(); auto & dst_data = dst->getData(); - dst_data.resize(src_size); + dst_data.resize(input_rows_count); - const auto rows_remaining = src_size % Impl::rows_per_iteration; - const auto rows_size = src_size - rows_remaining; + const auto rows_remaining = input_rows_count % Impl::rows_per_iteration; + const auto rows_size = input_rows_count - rows_remaining; for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration) Impl::execute(&left_src_data[i], right_src_data, &dst_data[i]); @@ -165,7 +162,7 @@ private: return nullptr; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & col_left = arguments[0]; const ColumnWithTypeAndName & col_right = arguments[1]; @@ -202,7 +199,7 @@ private: if (const auto left_arg_typed = checkAndGetColumn(left_arg)) { - if ((res = executeTyped(left_arg_typed, right_arg))) + if ((res = executeTyped(left_arg_typed, right_arg, input_rows_count))) return true; throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function {}", @@ -210,7 +207,7 @@ private: } if (const auto left_arg_typed = checkAndGetColumnConst(left_arg)) { - if ((res = executeTyped(left_arg_typed, right_arg))) + if ((res = executeTyped(left_arg_typed, right_arg, input_rows_count))) return true; throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function {}", diff --git a/src/Functions/FunctionMathUnary.h b/src/Functions/FunctionMathUnary.h index 9f400932356..3cc8bf391b4 100644 --- a/src/Functions/FunctionMathUnary.h +++ b/src/Functions/FunctionMathUnary.h @@ -106,42 +106,40 @@ private: } template - static ColumnPtr execute(const ColumnVector * col) + static ColumnPtr execute(const ColumnVector * col, size_t input_rows_count) { const auto & src_data = col->getData(); - const size_t size = src_data.size(); auto dst = ColumnVector::create(); auto & dst_data = dst->getData(); - dst_data.resize(size); + dst_data.resize(input_rows_count); - executeInIterations(src_data.data(), dst_data.data(), size); + executeInIterations(src_data.data(), dst_data.data(), input_rows_count); return dst; } template - static ColumnPtr execute(const ColumnDecimal * col) + static ColumnPtr execute(const ColumnDecimal * col, size_t input_rows_count) { const auto & src_data = col->getData(); - const size_t size = src_data.size(); UInt32 scale = col->getScale(); auto dst = ColumnVector::create(); auto & dst_data = dst->getData(); - dst_data.resize(size); + dst_data.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) dst_data[i] = DecimalUtils::convertTo(src_data[i], scale); - executeInIterations(dst_data.data(), dst_data.data(), size); + executeInIterations(dst_data.data(), dst_data.data(), input_rows_count); return dst; } bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & col = arguments[0]; ColumnPtr res; @@ -156,7 +154,7 @@ private: const auto col_vec = checkAndGetColumn(col.column.get()); if (col_vec == nullptr) return false; - return (res = execute(col_vec)) != nullptr; + return (res = execute(col_vec, input_rows_count)) != nullptr; }; if (!callOnBasicType(col.type->getTypeId(), call)) diff --git a/src/Functions/FunctionNumericPredicate.h b/src/Functions/FunctionNumericPredicate.h index fd495d7e8e7..97a3639734b 100644 --- a/src/Functions/FunctionNumericPredicate.h +++ b/src/Functions/FunctionNumericPredicate.h @@ -53,39 +53,37 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto * in = arguments.front().column.get(); ColumnPtr res; - if (!((res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)))) + if (!((res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName()); return res; } template - ColumnPtr execute(const IColumn * in_untyped) const + ColumnPtr execute(const IColumn * in_untyped, size_t input_rows_count) const { if (const auto in = checkAndGetColumn>(in_untyped)) { - const auto size = in->size(); - - auto out = ColumnUInt8::create(size); + auto out = ColumnUInt8::create(input_rows_count); const auto & in_data = in->getData(); auto & out_data = out->getData(); - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < input_rows_count; ++i) out_data[i] = Impl::execute(in_data[i]); return out; diff --git a/src/Functions/FunctionSpaceFillingCurve.h b/src/Functions/FunctionSpaceFillingCurve.h index ac9215f88e1..76f6678e847 100644 --- a/src/Functions/FunctionSpaceFillingCurve.h +++ b/src/Functions/FunctionSpaceFillingCurve.h @@ -132,9 +132,7 @@ public: } DataTypes types(tuple_size); for (size_t i = 0; i < tuple_size; i++) - { types[i] = std::make_shared(); - } return std::make_shared(types); } }; diff --git a/src/Functions/FunctionStringOrArrayToT.h b/src/Functions/FunctionStringOrArrayToT.h index 26c740f1fac..40f780d82a8 100644 --- a/src/Functions/FunctionStringOrArrayToT.h +++ b/src/Functions/FunctionStringOrArrayToT.h @@ -71,7 +71,7 @@ public: typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(col->size()); - Impl::vector(col->getChars(), col->getOffsets(), vec_res); + Impl::vector(col->getChars(), col->getOffsets(), vec_res, input_rows_count); return col_res; } @@ -80,7 +80,7 @@ public: if (Impl::is_fixed_to_constant) { ResultType res = 0; - Impl::vectorFixedToConstant(col_fixed->getChars(), col_fixed->getN(), res); + Impl::vectorFixedToConstant(col_fixed->getChars(), col_fixed->getN(), res, input_rows_count); return result_type->createColumnConst(col_fixed->size(), toField(res)); } @@ -90,7 +90,7 @@ public: typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(col_fixed->size()); - Impl::vectorFixedToVector(col_fixed->getChars(), col_fixed->getN(), vec_res); + Impl::vectorFixedToVector(col_fixed->getChars(), col_fixed->getN(), vec_res, input_rows_count); return col_res; } @@ -101,7 +101,7 @@ public: typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(col_arr->size()); - Impl::array(col_arr->getOffsets(), vec_res); + Impl::array(col_arr->getOffsets(), vec_res, input_rows_count); return col_res; } @@ -112,7 +112,7 @@ public: vec_res.resize(col_map->size()); const auto & col_nested = col_map->getNestedColumn(); - Impl::array(col_nested.getOffsets(), vec_res); + Impl::array(col_nested.getOffsets(), vec_res, input_rows_count); return col_res; } else if (const ColumnUUID * col_uuid = checkAndGetColumn(column.get())) @@ -120,7 +120,7 @@ public: auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(col_uuid->size()); - Impl::uuid(col_uuid->getData(), input_rows_count, vec_res); + Impl::uuid(col_uuid->getData(), input_rows_count, vec_res, input_rows_count); return col_res; } else if (const ColumnIPv6 * col_ipv6 = checkAndGetColumn(column.get())) @@ -128,7 +128,7 @@ public: auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(col_ipv6->size()); - Impl::ipv6(col_ipv6->getData(), input_rows_count, vec_res); + Impl::ipv6(col_ipv6->getData(), input_rows_count, vec_res, input_rows_count); return col_res; } else if (const ColumnIPv4 * col_ipv4 = checkAndGetColumn(column.get())) @@ -136,7 +136,7 @@ public: auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(col_ipv4->size()); - Impl::ipv4(col_ipv4->getData(), input_rows_count, vec_res); + Impl::ipv4(col_ipv4->getData(), input_rows_count, vec_res, input_rows_count); return col_res; } else diff --git a/src/Functions/FunctionStringReplace.h b/src/Functions/FunctionStringReplace.h index b4bcfa514a8..432e03bfe9e 100644 --- a/src/Functions/FunctionStringReplace.h +++ b/src/Functions/FunctionStringReplace.h @@ -45,7 +45,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr column_haystack = arguments[0].column; column_haystack = column_haystack->convertToFullColumnIfConst(); @@ -70,7 +70,8 @@ public: col_haystack->getChars(), col_haystack->getOffsets(), col_needle_const->getValue(), col_replacement_const->getValue(), - col_res->getChars(), col_res->getOffsets()); + col_res->getChars(), col_res->getOffsets(), + input_rows_count); return col_res; } else if (col_haystack && col_needle_vector && col_replacement_const) @@ -79,7 +80,8 @@ public: col_haystack->getChars(), col_haystack->getOffsets(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), col_replacement_const->getValue(), - col_res->getChars(), col_res->getOffsets()); + col_res->getChars(), col_res->getOffsets(), + input_rows_count); return col_res; } else if (col_haystack && col_needle_const && col_replacement_vector) @@ -88,7 +90,8 @@ public: col_haystack->getChars(), col_haystack->getOffsets(), col_needle_const->getValue(), col_replacement_vector->getChars(), col_replacement_vector->getOffsets(), - col_res->getChars(), col_res->getOffsets()); + col_res->getChars(), col_res->getOffsets(), + input_rows_count); return col_res; } else if (col_haystack && col_needle_vector && col_replacement_vector) @@ -97,7 +100,8 @@ public: col_haystack->getChars(), col_haystack->getOffsets(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), col_replacement_vector->getChars(), col_replacement_vector->getOffsets(), - col_res->getChars(), col_res->getOffsets()); + col_res->getChars(), col_res->getOffsets(), + input_rows_count); return col_res; } else if (col_haystack_fixed && col_needle_const && col_replacement_const) @@ -106,7 +110,8 @@ public: col_haystack_fixed->getChars(), col_haystack_fixed->getN(), col_needle_const->getValue(), col_replacement_const->getValue(), - col_res->getChars(), col_res->getOffsets()); + col_res->getChars(), col_res->getOffsets(), + input_rows_count); return col_res; } else diff --git a/src/Functions/FunctionStringToString.h b/src/Functions/FunctionStringToString.h index 62d7b09f79e..e0e64e47b49 100644 --- a/src/Functions/FunctionStringToString.h +++ b/src/Functions/FunctionStringToString.h @@ -59,19 +59,19 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr column = arguments[0].column; if (const ColumnString * col = checkAndGetColumn(column.get())) { auto col_res = ColumnString::create(); - Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets()); + Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), input_rows_count); return col_res; } else if (const ColumnFixedString * col_fixed = checkAndGetColumn(column.get())) { auto col_res = ColumnFixedString::create(col_fixed->getN()); - Impl::vectorFixed(col_fixed->getChars(), col_fixed->getN(), col_res->getChars()); + Impl::vectorFixed(col_fixed->getChars(), col_fixed->getN(), col_res->getChars(), input_rows_count); return col_res; } else diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index f1435ca5651..b6d8e9ee589 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -84,7 +84,7 @@ public: return std::make_shared(std::make_shared()); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { Generator generator; generator.init(arguments, max_substrings_includes_remaining_string); @@ -107,18 +107,17 @@ public: const ColumnString::Chars & src_chars = col_str->getChars(); const ColumnString::Offsets & src_offsets = col_str->getOffsets(); - res_offsets.reserve(src_offsets.size()); - res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random. + res_offsets.reserve(input_rows_count); + res_strings_offsets.reserve(input_rows_count * 5); /// Constant 5 - at random. res_strings_chars.reserve(src_chars.size()); Pos token_begin = nullptr; Pos token_end = nullptr; - size_t size = src_offsets.size(); ColumnString::Offset current_src_offset = 0; ColumnArray::Offset current_dst_offset = 0; ColumnString::Offset current_dst_strings_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Pos pos = reinterpret_cast(&src_chars[current_src_offset]); current_src_offset = src_offsets[i]; diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp index f77d2f1f350..b3861f0394d 100644 --- a/src/Functions/FunctionsBinaryRepresentation.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -632,7 +632,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr & column = arguments[0].column; @@ -646,11 +646,10 @@ public: const ColumnString::Chars & in_vec = col->getChars(); const ColumnString::Offsets & in_offsets = col->getOffsets(); - size_t size = in_offsets.size(); - out_offsets.resize(size); + out_offsets.resize(input_rows_count); size_t max_out_len = 0; - for (size_t i = 0; i < in_offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const size_t len = in_offsets[i] - (i == 0 ? 0 : in_offsets[i - 1]) - /* trailing zero symbol that is always added in ColumnString and that is ignored while decoding */ 1; @@ -662,7 +661,7 @@ public: char * pos = begin; size_t prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t new_offset = in_offsets[i]; @@ -691,15 +690,14 @@ public: const ColumnString::Chars & in_vec = col_fix_string->getChars(); const size_t n = col_fix_string->getN(); - size_t size = col_fix_string->size(); - out_offsets.resize(size); - out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * size); + out_offsets.resize(input_rows_count); + out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * input_rows_count); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; size_t prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t new_offset = prev_offset + n; diff --git a/src/Functions/FunctionsBitToArray.cpp b/src/Functions/FunctionsBitToArray.cpp index beaaccad6db..0a14516268a 100644 --- a/src/Functions/FunctionsBitToArray.cpp +++ b/src/Functions/FunctionsBitToArray.cpp @@ -60,17 +60,17 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr res; - if (!((res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)))) + if (!((res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()); @@ -98,7 +98,7 @@ private: } template - ColumnPtr executeType(const ColumnsWithTypeAndName & columns) const + ColumnPtr executeType(const ColumnsWithTypeAndName & columns, size_t input_rows_count) const { if (const ColumnVector * col_from = checkAndGetColumn>(columns[0].column.get())) { @@ -107,13 +107,12 @@ private: const typename ColumnVector::Container & vec_from = col_from->getData(); ColumnString::Chars & data_to = col_to->getChars(); ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - data_to.resize(size * 2); - offsets_to.resize(size); + data_to.resize(input_rows_count * 2); + offsets_to.resize(input_rows_count); WriteBufferFromVector buf_to(data_to); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { writeBitmask(vec_from[i], buf_to); writeChar(0, buf_to); @@ -244,7 +243,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } template - ColumnPtr executeType(const IColumn * column) const + ColumnPtr executeType(const IColumn * column, size_t input_rows_count) const { const ColumnVector * col_from = checkAndGetColumn>(column); if (!col_from) @@ -257,13 +256,12 @@ public: auto & result_array_offsets_data = result_array_offsets->getData(); auto & vec_from = col_from->getData(); - size_t size = vec_from.size(); - result_array_offsets_data.resize(size); - result_array_values_data.reserve(size * 2); + result_array_offsets_data.resize(input_rows_count); + result_array_values_data.reserve(input_rows_count * 2); using UnsignedType = make_unsigned_t; - for (size_t row = 0; row < size; ++row) + for (size_t row = 0; row < input_rows_count; ++row) { UnsignedType x = static_cast(vec_from[row]); @@ -302,24 +300,24 @@ public: return result_column; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * in_column = arguments[0].column.get(); ColumnPtr result_column; - if (!((result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)))) + if (!((result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)))) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index 92ec71a3118..12b2b1a662a 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -155,7 +155,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); const auto * array_type = typeid_cast(from_type); @@ -165,21 +165,21 @@ public: WhichDataType which(nested_type); if (which.isUInt8()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isUInt16()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isUInt32()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isUInt64()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isInt8()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isInt16()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isInt32()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isInt64()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unexpected type {} of argument of function {}", from_type->getName(), getName()); @@ -187,7 +187,7 @@ public: private: template - ColumnPtr executeBitmapData(DataTypes & argument_types, const ColumnsWithTypeAndName & arguments) const + ColumnPtr executeBitmapData(DataTypes & argument_types, const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { // input data const ColumnArray * array = typeid_cast(arguments[0].column.get()); @@ -203,10 +203,10 @@ private: AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get( AggregateFunctionGroupBitmapData::name(), action, argument_types, params_row, properties); auto col_to = ColumnAggregateFunction::create(bitmap_function); - col_to->reserve(offsets.size()); + col_to->reserve(input_rows_count); size_t pos = 0; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { col_to->insertDefault(); AggregateFunctionGroupBitmapData & bitmap_data diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp index 0a332ab70a9..f01ede64cc0 100644 --- a/src/Functions/FunctionsCharsetClassification.cpp +++ b/src/Functions/FunctionsCharsetClassification.cpp @@ -23,7 +23,7 @@ namespace constexpr size_t max_string_size = 1UL << 15; template - ALWAYS_INLINE inline Float64 naiveBayes( + Float64 naiveBayes( const FrequencyHolder::EncodingMap & standard, const ModelMap & model, Float64 max_result) @@ -51,7 +51,7 @@ namespace /// Count how many times each bigram occurs in the text. template - ALWAYS_INLINE inline void calculateStats( + void calculateStats( const UInt8 * data, const size_t size, ModelMap & model) @@ -77,24 +77,25 @@ struct CharsetClassificationImpl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { const auto & encodings_freq = FrequencyHolder::getInstance().getEncodingsFrequency(); if constexpr (detect_language) /// 2 chars for ISO code + 1 zero byte - res_data.reserve(offsets.size() * 3); + res_data.reserve(input_rows_count * 3); else /// Mean charset length is 8 - res_data.reserve(offsets.size() * 8); + res_data.reserve(input_rows_count * 8); - res_offsets.resize(offsets.size()); + res_offsets.resize(input_rows_count); size_t current_result_offset = 0; double zero_frequency_log = log(zero_frequency); - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const UInt8 * str = data.data() + offsets[i - 1]; const size_t str_len = offsets[i] - offsets[i - 1] - 1; diff --git a/src/Functions/FunctionsCodingIP.cpp b/src/Functions/FunctionsCodingIP.cpp index 0a97d029f84..0416df8f462 100644 --- a/src/Functions/FunctionsCodingIP.cpp +++ b/src/Functions/FunctionsCodingIP.cpp @@ -341,7 +341,7 @@ class FunctionIPv4NumToString : public IFunction { private: template - ColumnPtr executeTyped(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const + ColumnPtr executeTyped(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { using ColumnType = ColumnVector; @@ -356,12 +356,12 @@ private: ColumnString::Chars & vec_res = col_res->getChars(); ColumnString::Offsets & offsets_res = col_res->getOffsets(); - vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0 - offsets_res.resize(vec_in.size()); + vec_res.resize(input_rows_count * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0 + offsets_res.resize(input_rows_count); char * begin = reinterpret_cast(vec_res.data()); char * pos = begin; - for (size_t i = 0; i < vec_in.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { DB::formatIPv4(reinterpret_cast(&vec_in[i]), sizeof(ArgType), pos, mask_tail_octets, "xxx"); offsets_res[i] = pos - begin; @@ -532,7 +532,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -542,11 +542,11 @@ public: auto col_res = ColumnIPv6::create(); auto & vec_res = col_res->getData(); - vec_res.resize(col_in->size()); + vec_res.resize(input_rows_count); const auto & vec_in = col_in->getData(); - for (size_t i = 0; i < vec_res.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) mapIPv4ToIPv6(vec_in[i], reinterpret_cast(&vec_res[i].toUnderType())); return col_res; @@ -557,7 +557,7 @@ public: auto col_res = ColumnFixedString::create(IPV6_BINARY_LENGTH); auto & vec_res = col_res->getChars(); - vec_res.resize(col_in->size() * IPV6_BINARY_LENGTH); + vec_res.resize(input_rows_count * IPV6_BINARY_LENGTH); const auto & vec_in = col_in->getData(); @@ -742,7 +742,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr & column = arguments[0].column; @@ -751,13 +751,13 @@ public: auto col_res = ColumnUInt64::create(); ColumnUInt64::Container & vec_res = col_res->getData(); - vec_res.resize(col->size()); + vec_res.resize(input_rows_count); const ColumnString::Chars & vec_src = col->getChars(); const ColumnString::Offsets & offsets_src = col->getOffsets(); size_t prev_offset = 0; - for (size_t i = 0; i < vec_res.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t current_offset = offsets_src[i]; size_t string_size = current_offset - prev_offset - 1; /// mind the terminating zero byte @@ -1054,7 +1054,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnString * input_column = checkAndGetColumn(arguments[0].column.get()); @@ -1067,14 +1067,14 @@ public: auto col_res = ColumnUInt8::create(); ColumnUInt8::Container & vec_res = col_res->getData(); - vec_res.resize(input_column->size()); + vec_res.resize(input_rows_count); const ColumnString::Chars & vec_src = input_column->getChars(); const ColumnString::Offsets & offsets_src = input_column->getOffsets(); size_t prev_offset = 0; UInt32 result = 0; - for (size_t i = 0; i < vec_res.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { vec_res[i] = DB::parseIPv4whole(reinterpret_cast(&vec_src[prev_offset]), reinterpret_cast(&result)); prev_offset = offsets_src[i]; @@ -1110,7 +1110,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnString * input_column = checkAndGetColumn(arguments[0].column.get()); if (!input_column) @@ -1122,14 +1122,14 @@ public: auto col_res = ColumnUInt8::create(); ColumnUInt8::Container & vec_res = col_res->getData(); - vec_res.resize(input_column->size()); + vec_res.resize(input_rows_count); const ColumnString::Chars & vec_src = input_column->getChars(); const ColumnString::Offsets & offsets_src = input_column->getOffsets(); size_t prev_offset = 0; char buffer[IPV6_BINARY_LENGTH]; - for (size_t i = 0; i < vec_res.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { vec_res[i] = DB::parseIPv6whole(reinterpret_cast(&vec_src[prev_offset]), reinterpret_cast(&vec_src[offsets_src[i] - 1]), diff --git a/src/Functions/FunctionsCodingUUID.cpp b/src/Functions/FunctionsCodingUUID.cpp index 83fdcbc4af9..179ba1bf97a 100644 --- a/src/Functions/FunctionsCodingUUID.cpp +++ b/src/Functions/FunctionsCodingUUID.cpp @@ -177,7 +177,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -189,21 +189,20 @@ public: "Illegal type {} of column {} argument of function {}, expected FixedString({})", col_type_name.type->getName(), col_in->getName(), getName(), uuid_bytes_length); - const auto size = col_in->size(); const auto & vec_in = col_in->getChars(); auto col_res = ColumnString::create(); ColumnString::Chars & vec_res = col_res->getChars(); ColumnString::Offsets & offsets_res = col_res->getOffsets(); - vec_res.resize(size * (uuid_text_length + 1)); - offsets_res.resize(size); + vec_res.resize(input_rows_count * (uuid_text_length + 1)); + offsets_res.resize(input_rows_count); size_t src_offset = 0; size_t dst_offset = 0; const UUIDSerializer uuid_serializer(variant); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { uuid_serializer.deserialize(&vec_in[src_offset], &vec_res[dst_offset]); src_offset += uuid_bytes_length; @@ -256,7 +255,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -266,17 +265,16 @@ public: { const auto & vec_in = col_in->getChars(); const auto & offsets_in = col_in->getOffsets(); - const size_t size = offsets_in.size(); auto col_res = ColumnFixedString::create(uuid_bytes_length); ColumnString::Chars & vec_res = col_res->getChars(); - vec_res.resize(size * uuid_bytes_length); + vec_res.resize(input_rows_count * uuid_bytes_length); size_t src_offset = 0; size_t dst_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { /// If string has incorrect length - then return zero UUID. /// If string has correct length but contains something not like UUID - implementation specific behaviour. @@ -300,18 +298,17 @@ public: "Illegal type {} of column {} argument of function {}, expected FixedString({})", col_type_name.type->getName(), col_in_fixed->getName(), getName(), uuid_text_length); - const auto size = col_in_fixed->size(); const auto & vec_in = col_in_fixed->getChars(); auto col_res = ColumnFixedString::create(uuid_bytes_length); ColumnString::Chars & vec_res = col_res->getChars(); - vec_res.resize(size * uuid_bytes_length); + vec_res.resize(input_rows_count * uuid_bytes_length); size_t src_offset = 0; size_t dst_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { uuid_serializer.serialize(&vec_in[src_offset], &vec_res[dst_offset]); src_offset += uuid_text_length; @@ -359,7 +356,7 @@ public: return std::make_shared(uuid_bytes_length); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -370,16 +367,15 @@ public: { const auto & vec_in = col_in->getData(); const UUID * uuids = vec_in.data(); - const size_t size = vec_in.size(); auto col_res = ColumnFixedString::create(uuid_bytes_length); ColumnString::Chars & vec_res = col_res->getChars(); - vec_res.resize(size * uuid_bytes_length); + vec_res.resize(input_rows_count * uuid_bytes_length); size_t dst_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { uint64_t hiBytes = DB::UUIDHelpers::getHighBytes(uuids[i]); uint64_t loBytes = DB::UUIDHelpers::getLowBytes(uuids[i]); @@ -448,7 +444,7 @@ public: return std::make_shared(datetime_scale, timezone); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -457,12 +453,11 @@ public: { const auto & vec_in = col_in->getData(); const UUID * uuids = vec_in.data(); - const size_t size = vec_in.size(); - auto col_res = ColumnDateTime64::create(size, datetime_scale); + auto col_res = ColumnDateTime64::create(input_rows_count, datetime_scale); auto & vec_res = col_res->getData(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const uint64_t hiBytes = DB::UUIDHelpers::getHighBytes(uuids[i]); const uint64_t ms = ((hiBytes & 0xf000) == 0x7000) ? (hiBytes >> 16) : 0; diff --git a/src/Functions/FunctionsDecimalArithmetics.h b/src/Functions/FunctionsDecimalArithmetics.h index e26ad7362b3..9b9045f7c69 100644 --- a/src/Functions/FunctionsDecimalArithmetics.h +++ b/src/Functions/FunctionsDecimalArithmetics.h @@ -151,36 +151,36 @@ struct Processor template void NO_INLINE vectorConstant(const FirstArgVectorType & vec_first, const SecondArgType second_value, - PaddedPODArray & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const + PaddedPODArray & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale, + size_t input_rows_count) const { - size_t size = vec_first.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = transform.execute(vec_first[i], second_value, scale_a, scale_b, result_scale); } template void NO_INLINE vectorVector(const FirstArgVectorType & vec_first, const SecondArgVectorType & vec_second, - PaddedPODArray & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const + PaddedPODArray & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale, + size_t input_rows_count) const { - size_t size = vec_first.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = transform.execute(vec_first[i], vec_second[i], scale_a, scale_b, result_scale); } template void NO_INLINE constantVector(const FirstArgType & first_value, const SecondArgVectorType & vec_second, - PaddedPODArray & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const + PaddedPODArray & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale, + size_t input_rows_count) const { - size_t size = vec_second.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = transform.execute(first_value, vec_second[i], scale_a, scale_b, result_scale); } }; @@ -189,7 +189,7 @@ struct Processor template struct DecimalArithmeticsImpl { - static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) + static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) { using FirstArgValueType = typename FirstArgType::FieldType; using FirstArgColumnType = typename FirstArgType::ColumnType; @@ -214,13 +214,13 @@ struct DecimalArithmeticsImpl if (first_col) { if (second_col_const) - op.vectorConstant(first_col->getData(), second_col_const->template getValue(), col_to->getData(), scale_a, scale_b, result_scale); + op.vectorConstant(first_col->getData(), second_col_const->template getValue(), col_to->getData(), scale_a, scale_b, result_scale, input_rows_count); else - op.vectorVector(first_col->getData(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale); + op.vectorVector(first_col->getData(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale, input_rows_count); } else if (first_col_const) { - op.constantVector(first_col_const->template getValue(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale); + op.constantVector(first_col_const->template getValue(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale, input_rows_count); } else { @@ -293,14 +293,14 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { - return resolveOverload(arguments, result_type); + return resolveOverload(arguments, result_type, input_rows_count); } private: // long resolver to call proper templated func - ColumnPtr resolveOverload(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const + ColumnPtr resolveOverload(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const { WhichDataType which_dividend(arguments[0].type.get()); WhichDataType which_divisor(arguments[1].type.get()); @@ -309,26 +309,26 @@ private: { using DividendType = DataTypeDecimal32; if (which_divisor.isDecimal32()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal64()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal128()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal256()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); } else if (which_dividend.isDecimal64()) { using DividendType = DataTypeDecimal64; if (which_divisor.isDecimal32()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal64()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal128()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal256()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); } @@ -336,13 +336,13 @@ private: { using DividendType = DataTypeDecimal128; if (which_divisor.isDecimal32()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal64()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal128()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal256()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); } @@ -350,13 +350,13 @@ private: { using DividendType = DataTypeDecimal256; if (which_divisor.isDecimal32()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal64()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal128()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal256()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); } diff --git a/src/Functions/FunctionsEmbeddedDictionaries.h b/src/Functions/FunctionsEmbeddedDictionaries.h index 2f270bf999a..f27934ce5a9 100644 --- a/src/Functions/FunctionsEmbeddedDictionaries.h +++ b/src/Functions/FunctionsEmbeddedDictionaries.h @@ -181,7 +181,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -205,10 +205,9 @@ public: const typename ColumnVector::Container & vec_from = col_from->getData(); typename ColumnVector::Container & vec_to = col_to->getData(); - size_t size = vec_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = Transform::apply(vec_from[i], dict); return col_to; @@ -273,7 +272,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -303,10 +302,9 @@ public: const typename ColumnVector::Container & vec_from1 = col_vec1->getData(); const typename ColumnVector::Container & vec_from2 = col_vec2->getData(); typename ColumnUInt8::Container & vec_to = col_to->getData(); - size_t size = vec_from1.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = Transform::apply(vec_from1[i], vec_from2[i], dict); return col_to; @@ -318,10 +316,9 @@ public: const typename ColumnVector::Container & vec_from1 = col_vec1->getData(); const T const_from2 = col_const2->template getValue(); typename ColumnUInt8::Container & vec_to = col_to->getData(); - size_t size = vec_from1.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = Transform::apply(vec_from1[i], const_from2, dict); return col_to; @@ -333,10 +330,9 @@ public: const T const_from1 = col_const1->template getValue(); const typename ColumnVector::Container & vec_from2 = col_vec2->getData(); typename ColumnUInt8::Container & vec_to = col_to->getData(); - size_t size = vec_from2.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = Transform::apply(const_from1, vec_from2[i], dict); return col_to; @@ -405,7 +401,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -432,11 +428,10 @@ public: auto & res_values = col_values->getData(); const typename ColumnVector::Container & vec_from = col_from->getData(); - size_t size = vec_from.size(); - res_offsets.resize(size); - res_values.reserve(size * 4); + res_offsets.resize(input_rows_count); + res_values.reserve(input_rows_count * 4); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { T cur = vec_from[i]; for (size_t depth = 0; cur && depth < DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH; ++depth) diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 47040545677..c35df8ba72d 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -125,7 +125,7 @@ public: } String error; - for (const auto i : collections::range(0, input_rows_count)) + for (size_t i = 0; i < input_rows_count; ++i) { if (!col_json_const) { @@ -314,7 +314,7 @@ private: static size_t calculateMaxSize(const ColumnString::Offsets & offsets) { size_t max_size = 0; - for (const auto i : collections::range(0, offsets.size())) + for (size_t i = 0; i < offsets.size(); ++i) { size_t size = offsets[i] - offsets[i - 1]; max_size = std::max(max_size, size); @@ -739,7 +739,8 @@ public: { NumberType value; - tryGetNumericValueFromJSONElement(value, element, convert_bool_to_integer, error); + if (!tryGetNumericValueFromJSONElement(value, element, convert_bool_to_integer, error)) + return false; auto & col_vec = assert_cast &>(dest); col_vec.insertValue(value); return true; diff --git a/src/Functions/FunctionsLanguageClassification.cpp b/src/Functions/FunctionsLanguageClassification.cpp index 55485d41ce0..410eea0f437 100644 --- a/src/Functions/FunctionsLanguageClassification.cpp +++ b/src/Functions/FunctionsLanguageClassification.cpp @@ -31,7 +31,7 @@ extern const int SUPPORT_IS_DISABLED; struct FunctionDetectLanguageImpl { - static ALWAYS_INLINE inline std::string_view codeISO(std::string_view code_string) + static std::string_view codeISO(std::string_view code_string) { if (code_string.ends_with("-Latn")) code_string.remove_suffix(code_string.size() - 5); @@ -63,16 +63,17 @@ struct FunctionDetectLanguageImpl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { /// Constant 3 is based on the fact that in general we need 2 characters for ISO code + 1 zero byte - res_data.reserve(offsets.size() * 3); - res_offsets.resize(offsets.size()); + res_data.reserve(input_rows_count * 3); + res_offsets.resize(input_rows_count); bool is_reliable; size_t res_offset = 0; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const UInt8 * str = data.data() + offsets[i - 1]; const size_t str_len = offsets[i] - offsets[i - 1] - 1; diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index e1814150da6..65d7473b945 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -48,7 +48,7 @@ using UInt8Container = ColumnUInt8::Container; using UInt8ColumnPtrs = std::vector; -MutableColumnPtr buildColumnFromTernaryData(const UInt8Container & ternary_data, const bool make_nullable) +MutableColumnPtr buildColumnFromTernaryData(const UInt8Container & ternary_data, bool make_nullable) { const size_t rows_count = ternary_data.size(); diff --git a/src/Functions/FunctionsProgrammingClassification.cpp b/src/Functions/FunctionsProgrammingClassification.cpp index c01e47ad0d7..bbedef024d5 100644 --- a/src/Functions/FunctionsProgrammingClassification.cpp +++ b/src/Functions/FunctionsProgrammingClassification.cpp @@ -40,17 +40,18 @@ struct FunctionDetectProgrammingLanguageImpl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { const auto & programming_freq = FrequencyHolder::getInstance().getProgrammingFrequency(); /// Constant 5 is arbitrary - res_data.reserve(offsets.size() * 5); - res_offsets.resize(offsets.size()); + res_data.reserve(input_rows_count * 5); + res_offsets.resize(input_rows_count); size_t res_offset = 0; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const UInt8 * str = data.data() + offsets[i - 1]; const size_t str_len = offsets[i] - offsets[i - 1] - 1; diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 36448c6f689..83075ca01cb 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -80,8 +80,7 @@ public: auto col_to = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_to->getData(); - size_t size = input_rows_count; - vec_to.resize(size); + vec_to.resize(input_rows_count); RandImpl::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(ToType)); return col_to; diff --git a/src/Functions/FunctionsStringDistance.cpp b/src/Functions/FunctionsStringDistance.cpp index 48f4aaf4e09..5aae92a8141 100644 --- a/src/Functions/FunctionsStringDistance.cpp +++ b/src/Functions/FunctionsStringDistance.cpp @@ -37,12 +37,12 @@ struct FunctionStringDistanceImpl const ColumnString::Offsets & haystack_offsets, const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { - size_t size = res.size(); const char * haystack = reinterpret_cast(haystack_data.data()); const char * needle = reinterpret_cast(needle_data.data()); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = Op::process( haystack + haystack_offsets[i - 1], @@ -56,13 +56,13 @@ struct FunctionStringDistanceImpl const String & haystack, const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { const char * haystack_data = haystack.data(); size_t haystack_size = haystack.size(); const char * needle = reinterpret_cast(needle_data.data()); - size_t size = res.size(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = Op::process(haystack_data, haystack_size, needle + needle_offsets[i - 1], needle_offsets[i] - needle_offsets[i - 1] - 1); @@ -73,9 +73,10 @@ struct FunctionStringDistanceImpl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const String & needle, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { - constantVector(needle, data, offsets, res); + constantVector(needle, data, offsets, res, input_rows_count); } }; diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp index 0bf6e39e651..bd7d45d781a 100644 --- a/src/Functions/FunctionsStringHash.cpp +++ b/src/Functions/FunctionsStringHash.cpp @@ -315,9 +315,9 @@ struct SimHashImpl return getSimHash(finger_vec); } - static void apply(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, size_t shingle_size, PaddedPODArray & res) + static void apply(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, size_t shingle_size, PaddedPODArray & res, size_t input_rows_count) { - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const UInt8 * one_data = &data[offsets[i - 1]]; const size_t data_size = offsets[i] - offsets[i - 1] - 1; @@ -543,12 +543,13 @@ struct MinHashImpl PaddedPODArray * res1, PaddedPODArray * res2, ColumnTuple * res1_strings, - ColumnTuple * res2_strings) + ColumnTuple * res2_strings, + size_t input_rows_count) { MinHeap min_heap; MaxHeap max_heap; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const UInt8 * one_data = &data[offsets[i - 1]]; const size_t data_size = offsets[i] - offsets[i - 1] - 1; diff --git a/src/Functions/FunctionsStringHash.h b/src/Functions/FunctionsStringHash.h index fcd4c970a47..f790c660e21 100644 --- a/src/Functions/FunctionsStringHash.h +++ b/src/Functions/FunctionsStringHash.h @@ -135,7 +135,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr & column = arguments[0].column; @@ -152,9 +152,9 @@ public: { auto col_res = ColumnVector::create(); auto & vec_res = col_res->getData(); - vec_res.resize(column->size()); + vec_res.resize(input_rows_count); const ColumnString & col_str_vector = checkAndGetColumn(*column); - Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, vec_res); + Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, vec_res, input_rows_count); return col_res; } else if constexpr (is_arg) // Min hash arg @@ -171,7 +171,7 @@ public: auto max_tuple = ColumnTuple::create(std::move(max_columns)); const ColumnString & col_str_vector = checkAndGetColumn(*column); - Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, nullptr, nullptr, min_tuple.get(), max_tuple.get()); + Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, nullptr, nullptr, min_tuple.get(), max_tuple.get(), input_rows_count); MutableColumns tuple_columns; tuple_columns.emplace_back(std::move(min_tuple)); @@ -184,10 +184,10 @@ public: auto col_h2 = ColumnVector::create(); auto & vec_h1 = col_h1->getData(); auto & vec_h2 = col_h2->getData(); - vec_h1.resize(column->size()); - vec_h2.resize(column->size()); + vec_h1.resize(input_rows_count); + vec_h2.resize(input_rows_count); const ColumnString & col_str_vector = checkAndGetColumn(*column); - Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, &vec_h1, &vec_h2, nullptr, nullptr); + Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, &vec_h1, &vec_h2, nullptr, nullptr, input_rows_count); MutableColumns tuple_columns; tuple_columns.emplace_back(std::move(col_h1)); tuple_columns.emplace_back(std::move(col_h2)); diff --git a/src/Functions/FunctionsStringHashFixedString.cpp b/src/Functions/FunctionsStringHashFixedString.cpp index 01e989a7f2c..9474fe2629e 100644 --- a/src/Functions/FunctionsStringHashFixedString.cpp +++ b/src/Functions/FunctionsStringHashFixedString.cpp @@ -224,7 +224,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { if (const ColumnString * col_from = checkAndGetColumn(arguments[0].column.get())) { @@ -233,11 +233,10 @@ public: const typename ColumnString::Chars & data = col_from->getChars(); const typename ColumnString::Offsets & offsets = col_from->getOffsets(); auto & chars_to = col_to->getChars(); - const auto size = offsets.size(); - chars_to.resize(size * Impl::length); + chars_to.resize(input_rows_count * Impl::length); ColumnString::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Impl::apply( reinterpret_cast(&data[current_offset]), @@ -253,11 +252,10 @@ public: { auto col_to = ColumnFixedString::create(Impl::length); const typename ColumnFixedString::Chars & data = col_from_fix->getChars(); - const auto size = col_from_fix->size(); auto & chars_to = col_to->getChars(); const auto length = col_from_fix->getN(); - chars_to.resize(size * Impl::length); - for (size_t i = 0; i < size; ++i) + chars_to.resize(input_rows_count * Impl::length); + for (size_t i = 0; i < input_rows_count; ++i) { Impl::apply( reinterpret_cast(&data[i * length]), length, reinterpret_cast(&chars_to[i * Impl::length])); @@ -268,11 +266,10 @@ public: { auto col_to = ColumnFixedString::create(Impl::length); const typename ColumnIPv6::Container & data = col_from_ip->getData(); - const auto size = col_from_ip->size(); auto & chars_to = col_to->getChars(); const auto length = sizeof(IPv6::UnderlyingType); - chars_to.resize(size * Impl::length); - for (size_t i = 0; i < size; ++i) + chars_to.resize(input_rows_count * Impl::length); + for (size_t i = 0; i < input_rows_count; ++i) { Impl::apply( reinterpret_cast(&data[i]), length, reinterpret_cast(&chars_to[i * Impl::length])); diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp index 7b3f2337c89..5e26e4ad482 100644 --- a/src/Functions/FunctionsStringSimilarity.cpp +++ b/src/Functions/FunctionsStringSimilarity.cpp @@ -90,7 +90,7 @@ struct NgramDistanceImpl ((cont[Offset + I] = std::tolower(cont[Offset + I])), ...); } - static ALWAYS_INLINE size_t readASCIICodePoints(CodePoint * code_points, const char *& pos, const char * end) + static size_t readASCIICodePoints(CodePoint * code_points, const char *& pos, const char * end) { /// Offset before which we copy some data. constexpr size_t padding_offset = default_padding - N + 1; @@ -120,7 +120,7 @@ struct NgramDistanceImpl return default_padding; } - static ALWAYS_INLINE size_t readUTF8CodePoints(CodePoint * code_points, const char *& pos, const char * end) + static size_t readUTF8CodePoints(CodePoint * code_points, const char *& pos, const char * end) { /// The same copying as described in the function above. memcpy(code_points, code_points + default_padding - N + 1, roundUpToPowerOfTwoOrZero(N - 1) * sizeof(CodePoint)); @@ -195,7 +195,7 @@ struct NgramDistanceImpl } template - static ALWAYS_INLINE inline size_t calculateNeedleStats( + static inline size_t calculateNeedleStats( const char * data, const size_t size, NgramCount * ngram_stats, @@ -228,7 +228,7 @@ struct NgramDistanceImpl } template - static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric( + static inline UInt64 calculateHaystackStatsAndMetric( const char * data, const size_t size, NgramCount * ngram_stats, @@ -318,9 +318,9 @@ struct NgramDistanceImpl const ColumnString::Offsets & haystack_offsets, const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { - const size_t haystack_offsets_size = haystack_offsets.size(); size_t prev_haystack_offset = 0; size_t prev_needle_offset = 0; @@ -331,7 +331,7 @@ struct NgramDistanceImpl std::unique_ptr needle_ngram_storage(new UInt16[max_string_size]); std::unique_ptr haystack_ngram_storage(new UInt16[max_string_size]); - for (size_t i = 0; i < haystack_offsets_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * haystack = reinterpret_cast(&haystack_data[prev_haystack_offset]); const size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1; @@ -391,12 +391,13 @@ struct NgramDistanceImpl std::string haystack, const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { /// For symmetric version it is better to use vector_constant if constexpr (symmetric) { - vectorConstant(needle_data, needle_offsets, std::move(haystack), res); + vectorConstant(needle_data, needle_offsets, std::move(haystack), res, input_rows_count); } else { @@ -404,7 +405,6 @@ struct NgramDistanceImpl haystack.resize(haystack_size + default_padding); /// For logic explanation see vector_vector function. - const size_t needle_offsets_size = needle_offsets.size(); size_t prev_offset = 0; std::unique_ptr common_stats{new NgramCount[map_size]{}}; @@ -412,7 +412,7 @@ struct NgramDistanceImpl std::unique_ptr needle_ngram_storage(new UInt16[max_string_size]); std::unique_ptr haystack_ngram_storage(new UInt16[max_string_size]); - for (size_t i = 0; i < needle_offsets_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * needle = reinterpret_cast(&needle_data[prev_offset]); const size_t needle_size = needle_offsets[i] - prev_offset - 1; @@ -456,7 +456,8 @@ struct NgramDistanceImpl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, std::string needle, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { /// zeroing our map std::unique_ptr common_stats{new NgramCount[map_size]{}}; @@ -472,7 +473,7 @@ struct NgramDistanceImpl size_t distance = needle_stats_size; size_t prev_offset = 0; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const UInt8 * haystack = &data[prev_offset]; const size_t haystack_size = offsets[i] - prev_offset - 1; diff --git a/src/Functions/FunctionsStringSimilarity.h b/src/Functions/FunctionsStringSimilarity.h index e148730054d..c2cf7137286 100644 --- a/src/Functions/FunctionsStringSimilarity.h +++ b/src/Functions/FunctionsStringSimilarity.h @@ -57,7 +57,7 @@ public: return std::make_shared>(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { using ResultType = typename Impl::ResultType; @@ -90,7 +90,7 @@ public: auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_res = col_res->getData(); - vec_res.resize(column_haystack->size()); + vec_res.resize(input_rows_count); const ColumnString * col_haystack_vector = checkAndGetColumn(&*column_haystack); const ColumnString * col_needle_vector = checkAndGetColumn(&*column_needle); @@ -110,7 +110,7 @@ public: Impl::max_string_size); } } - Impl::vectorConstant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needle, vec_res); + Impl::vectorConstant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needle, vec_res, input_rows_count); } else if (col_haystack_vector && col_needle_vector) { @@ -119,7 +119,8 @@ public: col_haystack_vector->getOffsets(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), - vec_res); + vec_res, + input_rows_count); } else if (col_haystack_const && col_needle_vector) { @@ -136,7 +137,7 @@ public: Impl::max_string_size); } } - Impl::constantVector(haystack, col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res); + Impl::constantVector(haystack, col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res, input_rows_count); } else { diff --git a/src/Functions/FunctionsTextClassification.h b/src/Functions/FunctionsTextClassification.h index 90e8af06ccc..d5cba690f81 100644 --- a/src/Functions/FunctionsTextClassification.h +++ b/src/Functions/FunctionsTextClassification.h @@ -55,7 +55,7 @@ public: return arguments[0]; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { const ColumnPtr & column = arguments[0].column; const ColumnString * col = checkAndGetColumn(column.get()); @@ -65,7 +65,7 @@ public: arguments[0].column->getName(), getName()); auto col_res = ColumnString::create(); - Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets()); + Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), input_rows_count); return col_res; } }; @@ -104,7 +104,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { const ColumnPtr & column = arguments[0].column; const ColumnString * col = checkAndGetColumn(column.get()); @@ -115,9 +115,9 @@ public: auto col_res = ColumnVector::create(); ColumnVector::Container & vec_res = col_res->getData(); - vec_res.resize(col->size()); + vec_res.resize(input_rows_count); - Impl::vector(col->getChars(), col->getOffsets(), vec_res); + Impl::vector(col->getChars(), col->getOffsets(), vec_res, input_rows_count); return col_res; } }; diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index f93a885ee65..77d740803be 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -130,7 +130,7 @@ struct TimeWindowImpl static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name); - static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name); + static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count); }; template @@ -196,7 +196,7 @@ struct TimeWindowImpl return std::make_shared(DataTypes{data_type, data_type}); } - static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto & interval_column = arguments[1]; @@ -214,38 +214,37 @@ struct TimeWindowImpl { /// TODO: add proper support for fractional seconds case IntervalKind::Kind::Second: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Minute: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Hour: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Day: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Week: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Month: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Quarter: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Year: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); default: throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet"); } } template - static ColumnPtr executeTumble(const ColumnDateTime & time_column, UInt64 num_units, const DateLUTImpl & time_zone) + static ColumnPtr executeTumble(const ColumnDateTime & time_column, UInt64 num_units, const DateLUTImpl & time_zone, size_t input_rows_count) { const auto & time_data = time_column.getData(); - size_t size = time_column.size(); auto start = ColumnVector::create(); auto end = ColumnVector::create(); auto & start_data = start->getData(); auto & end_data = end->getData(); - start_data.resize(size); - end_data.resize(size); - for (size_t i = 0; i != size; ++i) + start_data.resize(input_rows_count); + end_data.resize(input_rows_count); + for (size_t i = 0; i != input_rows_count; ++i) { start_data[i] = ToStartOfTransform::execute(time_data[i], num_units, time_zone); end_data[i] = AddTime::execute(start_data[i], num_units, time_zone); @@ -283,7 +282,7 @@ struct TimeWindowImpl } } - [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto which_type = WhichDataType(time_column.type); @@ -296,7 +295,7 @@ struct TimeWindowImpl result_column = time_column.column; } else - result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name, input_rows_count); return executeWindowBound(result_column, 0, function_name); } }; @@ -311,7 +310,7 @@ struct TimeWindowImpl return TimeWindowImpl::getReturnType(arguments, function_name); } - [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String& function_name) + [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String& function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto which_type = WhichDataType(time_column.type); @@ -324,7 +323,7 @@ struct TimeWindowImpl result_column = time_column.column; } else - result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name, input_rows_count); return executeWindowBound(result_column, 1, function_name); } }; @@ -372,7 +371,7 @@ struct TimeWindowImpl return std::make_shared(DataTypes{data_type, data_type}); } - static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto & hop_interval_column = arguments[1]; @@ -396,28 +395,28 @@ struct TimeWindowImpl /// TODO: add proper support for fractional seconds case IntervalKind::Kind::Second: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Minute: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Hour: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Day: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Week: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Month: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Quarter: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Year: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); default: throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet"); } @@ -425,18 +424,17 @@ struct TimeWindowImpl template static ColumnPtr - executeHop(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone) + executeHop(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone, size_t input_rows_count) { const auto & time_data = time_column.getData(); - size_t size = time_column.size(); auto start = ColumnVector::create(); auto end = ColumnVector::create(); auto & start_data = start->getData(); auto & end_data = end->getData(); - start_data.resize(size); - end_data.resize(size); + start_data.resize(input_rows_count); + end_data.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { ToType wstart = ToStartOfTransform::execute(time_data[i], hop_num_units, time_zone); ToType wend = AddTime::execute(wstart, hop_num_units, time_zone); @@ -509,7 +507,7 @@ struct TimeWindowImpl return std::make_shared(); } - static ColumnPtr dispatchForHopColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForHopColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto & hop_interval_column = arguments[1]; @@ -533,28 +531,28 @@ struct TimeWindowImpl /// TODO: add proper support for fractional seconds case IntervalKind::Kind::Second: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Minute: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Hour: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Day: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Week: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Month: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Quarter: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Year: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); default: throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet"); } @@ -563,17 +561,16 @@ struct TimeWindowImpl template static ColumnPtr - executeHopSlice(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone) + executeHopSlice(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone, size_t input_rows_count) { Int64 gcd_num_units = std::gcd(hop_num_units, window_num_units); const auto & time_data = time_column.getData(); - size_t size = time_column.size(); auto end = ColumnVector::create(); auto & end_data = end->getData(); - end_data.resize(size); - for (size_t i = 0; i < size; ++i) + end_data.resize(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) { ToType wstart = ToStartOfTransform::execute(time_data[i], hop_num_units, time_zone); ToType wend = AddTime::execute(wstart, hop_num_units, time_zone); @@ -593,23 +590,23 @@ struct TimeWindowImpl return end; } - static ColumnPtr dispatchForTumbleColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForTumbleColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { - ColumnPtr column = TimeWindowImpl::dispatchForColumns(arguments, function_name); + ColumnPtr column = TimeWindowImpl::dispatchForColumns(arguments, function_name, input_rows_count); return executeWindowBound(column, 1, function_name); } - static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { if (arguments.size() == 2) - return dispatchForTumbleColumns(arguments, function_name); + return dispatchForTumbleColumns(arguments, function_name, input_rows_count); else { const auto & third_column = arguments[2]; if (arguments.size() == 3 && WhichDataType(third_column.type).isString()) - return dispatchForTumbleColumns(arguments, function_name); + return dispatchForTumbleColumns(arguments, function_name, input_rows_count); else - return dispatchForHopColumns(arguments, function_name); + return dispatchForHopColumns(arguments, function_name, input_rows_count); } } }; @@ -639,7 +636,7 @@ struct TimeWindowImpl } } - static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto which_type = WhichDataType(time_column.type); @@ -652,7 +649,7 @@ struct TimeWindowImpl result_column = time_column.column; } else - result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name, input_rows_count); return executeWindowBound(result_column, 0, function_name); } }; @@ -667,7 +664,7 @@ struct TimeWindowImpl return TimeWindowImpl::getReturnType(arguments, function_name); } - static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto which_type = WhichDataType(time_column.type); @@ -680,7 +677,7 @@ struct TimeWindowImpl result_column = time_column.column; } else - result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name, input_rows_count); return executeWindowBound(result_column, 1, function_name); } @@ -693,9 +690,9 @@ DataTypePtr FunctionTimeWindow::getReturnTypeImpl(const ColumnsWithTypeAnd } template -ColumnPtr FunctionTimeWindow::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const +ColumnPtr FunctionTimeWindow::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const { - return TimeWindowImpl::dispatchForColumns(arguments, name); + return TimeWindowImpl::dispatchForColumns(arguments, name, input_rows_count); } } diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp index a9321819a26..7627c68c057 100644 --- a/src/Functions/FunctionsTonalityClassification.cpp +++ b/src/Functions/FunctionsTonalityClassification.cpp @@ -18,7 +18,7 @@ namespace DB */ struct FunctionDetectTonalityImpl { - static ALWAYS_INLINE inline Float32 detectTonality( + static Float32 detectTonality( const UInt8 * str, const size_t str_len, const FrequencyHolder::Map & emotional_dict) @@ -63,13 +63,13 @@ struct FunctionDetectTonalityImpl static void vector( const ColumnString::Chars & data, const ColumnString::Offsets & offsets, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { const auto & emotional_dict = FrequencyHolder::getInstance().getEmotionalDict(); - size_t size = offsets.size(); size_t prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = detectTonality(data.data() + prev_offset, offsets[i] - 1 - prev_offset, emotional_dict); prev_offset = offsets[i]; diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 8b092ba9b6e..31695fc95d5 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -47,85 +47,54 @@ bool allArgumentsAreConstants(const ColumnsWithTypeAndName & args) return true; } -/// Replaces single low cardinality column in a function call by its dictionary -/// This can only happen after the arguments have been adapted in IFunctionOverloadResolver::getReturnType -/// as it's only possible if there is one low cardinality column and, optionally, const columns ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count) { - /// We return the LC indexes so the LC can be reconstructed with the function result + size_t num_rows = input_rows_count; ColumnPtr indexes; - size_t number_low_cardinality_columns = 0; - size_t last_low_cardinality = 0; - size_t number_const_columns = 0; - size_t number_full_columns = 0; - - for (size_t i = 0; i < args.size(); i++) + /// Find first LowCardinality column and replace it to nested dictionary. + for (auto & column : args) { - auto const & arg = args[i]; - if (checkAndGetColumn(arg.column.get())) + if (const auto * low_cardinality_column = checkAndGetColumn(column.column.get())) { - number_low_cardinality_columns++; - last_low_cardinality = i; + /// Single LowCardinality column is supported now. + if (indexes) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single dictionary argument for function."); + + const auto * low_cardinality_type = checkAndGetDataType(column.type.get()); + + if (!low_cardinality_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Incompatible type for LowCardinality column: {}", + column.type->getName()); + + if (can_be_executed_on_default_arguments) + { + /// Normal case, when function can be executed on values' default. + column.column = low_cardinality_column->getDictionary().getNestedColumn(); + indexes = low_cardinality_column->getIndexesPtr(); + } + else + { + /// Special case when default value can't be used. Example: 1 % LowCardinality(Int). + /// LowCardinality always contains default, so 1 % 0 will throw exception in normal case. + auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size()); + column.column = dict_encoded.dictionary; + indexes = dict_encoded.indexes; + } + + num_rows = column.column->size(); + column.type = low_cardinality_type->getDictionaryType(); } - else if (checkAndGetColumn(arg.column.get())) - number_const_columns++; - else - number_full_columns++; } - if (!number_low_cardinality_columns && !number_const_columns) - return nullptr; - - if (number_full_columns > 0 || number_low_cardinality_columns > 1) - { - /// This should not be possible but currently there are multiple tests in CI failing because of it - /// TODO: Fix those cases, then enable this exception -#if 0 - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected low cardinality types found. Low cardinality: {}. Full {}. Const {}", - number_low_cardinality_columns, number_full_columns, number_const_columns); -#else - return nullptr; -#endif - } - else if (number_low_cardinality_columns == 1) - { - auto & lc_arg = args[last_low_cardinality]; - - const auto * low_cardinality_type = checkAndGetDataType(lc_arg.type.get()); - if (!low_cardinality_type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", lc_arg.type->getName()); - - const auto * low_cardinality_column = checkAndGetColumn(lc_arg.column.get()); - chassert(low_cardinality_column); - - if (can_be_executed_on_default_arguments) - { - /// Normal case, when function can be executed on values' default. - lc_arg.column = low_cardinality_column->getDictionary().getNestedColumn(); - indexes = low_cardinality_column->getIndexesPtr(); - } - else - { - /// Special case when default value can't be used. Example: 1 % LowCardinality(Int). - /// LowCardinality always contains default, so 1 % 0 will throw exception in normal case. - auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size()); - lc_arg.column = dict_encoded.dictionary; - indexes = dict_encoded.indexes; - } - - /// The new column will have a different number of rows, normally less but occasionally it might be more (NULL) - input_rows_count = lc_arg.column->size(); - lc_arg.type = low_cardinality_type->getDictionaryType(); - } - - /// Change size of constants + /// Change size of constants. for (auto & column : args) { if (const auto * column_const = checkAndGetColumn(column.column.get())) { - column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), input_rows_count); + column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), num_rows); column.type = recursiveRemoveLowCardinality(column.type); } } @@ -301,8 +270,6 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments(); const auto & dictionary_type = res_low_cardinality_type->getDictionaryType(); - /// The arguments should have been adapted in IFunctionOverloadResolver::getReturnType - /// So there is only one low cardinality column (and optionally some const columns) and no full column ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count); diff --git a/src/Functions/Kusto/KqlArraySort.cpp b/src/Functions/Kusto/KqlArraySort.cpp index 11157aa53e6..fb3e6259ee4 100644 --- a/src/Functions/Kusto/KqlArraySort.cpp +++ b/src/Functions/Kusto/KqlArraySort.cpp @@ -73,13 +73,11 @@ public: size_t array_count = arguments.size(); const auto & last_arg = arguments[array_count - 1]; - size_t input_rows_count_local = input_rows_count; - bool null_last = true; if (!isArray(last_arg.type)) { --array_count; - null_last = check_condition(last_arg, context, input_rows_count_local); + null_last = check_condition(last_arg, context, input_rows_count); } ColumnsWithTypeAndName new_args; @@ -119,11 +117,11 @@ public: } auto zipped - = FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count_local); + = FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count); ColumnsWithTypeAndName sort_arg({{zipped, std::make_shared(result_type), "zipped"}}); auto sorted_tuple - = FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count_local); + = FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count); auto null_type = std::make_shared(std::make_shared()); @@ -139,10 +137,10 @@ public: = std::make_shared(makeNullable(nested_types[i])); ColumnsWithTypeAndName null_array_arg({ - {null_type->createColumnConstWithDefaultValue(input_rows_count_local), null_type, "NULL"}, + {null_type->createColumnConstWithDefaultValue(input_rows_count), null_type, "NULL"}, }); - tuple_columns[i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count_local); + tuple_columns[i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count); tuple_columns[i] = tuple_columns[i]->convertToFullColumnIfConst(); } else @@ -153,7 +151,7 @@ public: auto tuple_coulmn = FunctionFactory::instance() .get("tupleElement", context) ->build(untuple_args) - ->execute(untuple_args, result_type, input_rows_count_local); + ->execute(untuple_args, result_type, input_rows_count); auto out_tmp = ColumnArray::create(nested_types[i]->createColumn()); @@ -183,7 +181,7 @@ public: auto inside_null_type = nested_types[0]; ColumnsWithTypeAndName indexof_args({ arg_of_index, - {inside_null_type->createColumnConstWithDefaultValue(input_rows_count_local), inside_null_type, "NULL"}, + {inside_null_type->createColumnConstWithDefaultValue(input_rows_count), inside_null_type, "NULL"}, }); auto null_index_datetype = std::make_shared(); @@ -192,7 +190,7 @@ public: slice_index.column = FunctionFactory::instance() .get("indexOf", context) ->build(indexof_args) - ->execute(indexof_args, result_type, input_rows_count_local); + ->execute(indexof_args, result_type, input_rows_count); auto null_index_in_array = slice_index.column->get64(0); if (null_index_in_array > 0) @@ -220,15 +218,15 @@ public: ColumnsWithTypeAndName slice_args_right( {{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")}, slice_index}); ColumnWithTypeAndName arr_left{ - fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count_local), arg_type, ""}; + fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count), arg_type, ""}; ColumnWithTypeAndName arr_right{ - fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count_local), arg_type, ""}; + fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count), arg_type, ""}; ColumnsWithTypeAndName arr_cancat({arr_right, arr_left}); auto out_tmp = FunctionFactory::instance() .get("arrayConcat", context) ->build(arr_cancat) - ->execute(arr_cancat, arg_type, input_rows_count_local); + ->execute(arr_cancat, arg_type, input_rows_count); adjusted_columns[i] = std::move(out_tmp); } } diff --git a/src/Functions/LowerUpperImpl.h b/src/Functions/LowerUpperImpl.h index 72b3ce1ca34..d463ef96e16 100644 --- a/src/Functions/LowerUpperImpl.h +++ b/src/Functions/LowerUpperImpl.h @@ -8,17 +8,19 @@ namespace DB template struct LowerUpperImpl { - static void vector(const ColumnString::Chars & data, + static void vector( + const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t /*input_rows_count*/) { res_data.resize_exact(data.size()); res_offsets.assign(offsets); array(data.data(), data.data() + data.size(), res_data.data()); } - static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data) + static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data, size_t /*input_rows_count*/) { res_data.resize_exact(data.size()); array(data.data(), data.data() + data.size(), res_data.data()); diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index eebba7b9d5f..eedabca5b22 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -90,7 +90,8 @@ struct LowerUpperUTF8Impl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { if (data.empty()) return; @@ -98,7 +99,7 @@ struct LowerUpperUTF8Impl bool all_ascii = isAllASCII(data.data(), data.size()); if (all_ascii) { - LowerUpperImpl::vector(data, offsets, res_data, res_offsets); + LowerUpperImpl::vector(data, offsets, res_data, res_offsets, input_rows_count); return; } @@ -107,7 +108,7 @@ struct LowerUpperUTF8Impl array(data.data(), data.data() + data.size(), offsets, res_data.data()); } - static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Functions lowerUTF8 and upperUTF8 cannot work with FixedString argument"); } diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index 57f1243537d..bf8241774a6 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -124,7 +124,7 @@ public: bool hasEmptyBound() const { return has_empty_bound; } - inline bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const + inline bool contains(CoordinateType x, CoordinateType y) const { Point point(x, y); @@ -167,7 +167,7 @@ public: UInt64 getAllocatedBytes() const; - inline bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const; + bool contains(CoordinateType x, CoordinateType y) const; private: enum class CellType : uint8_t @@ -199,7 +199,7 @@ private: } /// Inner part of the HalfPlane is the left side of initialized vector. - bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const { return a * x + b * y + c >= 0; } + bool contains(CoordinateType x, CoordinateType y) const { return a * x + b * y + c >= 0; } }; struct Cell @@ -233,7 +233,7 @@ private: void calcGridAttributes(Box & box); template - T ALWAYS_INLINE getCellIndex(T row, T col) const { return row * grid_size + col; } + T getCellIndex(T row, T col) const { return row * grid_size + col; } /// Complex case. Will check intersection directly. inline void addComplexPolygonCell(size_t index, const Box & box); diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index f5fb08f71d2..14f5a2d7932 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -201,15 +201,15 @@ struct ReplaceRegexpImpl const String & needle, const String & replacement, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { if (needle.empty()) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); ColumnString::Offset res_offset = 0; res_data.reserve(haystack_data.size()); - size_t haystack_size = haystack_offsets.size(); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); re2::RE2::Options regexp_options; regexp_options.set_log_errors(false); /// don't write error messages to stderr @@ -232,13 +232,13 @@ struct ReplaceRegexpImpl case ReplaceRegexpTraits::Replace::All: return ReplaceStringTraits::Replace::All; } }; - ReplaceStringImpl::vectorConstantConstant(haystack_data, haystack_offsets, needle, replacement, res_data, res_offsets); + ReplaceStringImpl::vectorConstantConstant(haystack_data, haystack_offsets, needle, replacement, res_data, res_offsets, input_rows_count); return; } Instructions instructions = createInstructions(replacement, num_captures); - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -257,19 +257,19 @@ struct ReplaceRegexpImpl const ColumnString::Offsets & needle_offsets, const String & replacement, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { assert(haystack_offsets.size() == needle_offsets.size()); ColumnString::Offset res_offset = 0; res_data.reserve(haystack_data.size()); - size_t haystack_size = haystack_offsets.size(); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); re2::RE2::Options regexp_options; regexp_options.set_log_errors(false); /// don't write error messages to stderr - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); @@ -302,7 +302,8 @@ struct ReplaceRegexpImpl const ColumnString::Chars & replacement_data, const ColumnString::Offsets & replacement_offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { assert(haystack_offsets.size() == replacement_offsets.size()); @@ -311,8 +312,7 @@ struct ReplaceRegexpImpl ColumnString::Offset res_offset = 0; res_data.reserve(haystack_data.size()); - size_t haystack_size = haystack_offsets.size(); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); re2::RE2::Options regexp_options; regexp_options.set_log_errors(false); /// don't write error messages to stderr @@ -323,7 +323,7 @@ struct ReplaceRegexpImpl int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); @@ -349,20 +349,20 @@ struct ReplaceRegexpImpl const ColumnString::Chars & replacement_data, const ColumnString::Offsets & replacement_offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { assert(haystack_offsets.size() == needle_offsets.size()); assert(needle_offsets.size() == replacement_offsets.size()); ColumnString::Offset res_offset = 0; res_data.reserve(haystack_data.size()); - size_t haystack_size = haystack_offsets.size(); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); re2::RE2::Options regexp_options; regexp_options.set_log_errors(false); /// don't write error messages to stderr - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); @@ -399,15 +399,15 @@ struct ReplaceRegexpImpl const String & needle, const String & replacement, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { if (needle.empty()) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); ColumnString::Offset res_offset = 0; - size_t haystack_size = haystack_data.size() / n; res_data.reserve(haystack_data.size()); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); re2::RE2::Options regexp_options; regexp_options.set_log_errors(false); /// don't write error messages to stderr @@ -419,7 +419,7 @@ struct ReplaceRegexpImpl int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); Instructions instructions = createInstructions(replacement, num_captures); - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t from = i * n; const char * hs_data = reinterpret_cast(haystack_data.data() + from); diff --git a/src/Functions/ReplaceStringImpl.h b/src/Functions/ReplaceStringImpl.h index de3942acbd8..7c56d657b3e 100644 --- a/src/Functions/ReplaceStringImpl.h +++ b/src/Functions/ReplaceStringImpl.h @@ -35,7 +35,8 @@ struct ReplaceStringImpl const String & needle, const String & replacement, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { if (needle.empty()) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); @@ -46,8 +47,7 @@ struct ReplaceStringImpl ColumnString::Offset res_offset = 0; res_data.reserve(haystack_data.size()); - const size_t haystack_size = haystack_offsets.size(); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); /// The current index in the array of strings. size_t i = 0; @@ -124,21 +124,20 @@ struct ReplaceStringImpl const ColumnString::Offsets & needle_offsets, const String & replacement, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { chassert(haystack_offsets.size() == needle_offsets.size()); - const size_t haystack_size = haystack_offsets.size(); - res_data.reserve(haystack_data.size()); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); ColumnString::Offset res_offset = 0; size_t prev_haystack_offset = 0; size_t prev_needle_offset = 0; - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset]; const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1; @@ -195,24 +194,23 @@ struct ReplaceStringImpl const ColumnString::Chars & replacement_data, const ColumnString::Offsets & replacement_offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { chassert(haystack_offsets.size() == replacement_offsets.size()); if (needle.empty()) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); - const size_t haystack_size = haystack_offsets.size(); - res_data.reserve(haystack_data.size()); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); ColumnString::Offset res_offset = 0; size_t prev_haystack_offset = 0; size_t prev_replacement_offset = 0; - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset]; const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1; @@ -267,15 +265,14 @@ struct ReplaceStringImpl const ColumnString::Chars & replacement_data, const ColumnString::Offsets & replacement_offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { chassert(haystack_offsets.size() == needle_offsets.size()); chassert(needle_offsets.size() == replacement_offsets.size()); - const size_t haystack_size = haystack_offsets.size(); - res_data.reserve(haystack_data.size()); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); ColumnString::Offset res_offset = 0; @@ -283,7 +280,7 @@ struct ReplaceStringImpl size_t prev_needle_offset = 0; size_t prev_replacement_offset = 0; - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset]; const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1; @@ -345,7 +342,8 @@ struct ReplaceStringImpl const String & needle, const String & replacement, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { if (needle.empty()) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); @@ -355,9 +353,8 @@ struct ReplaceStringImpl const UInt8 * pos = begin; ColumnString::Offset res_offset = 0; - size_t haystack_size = haystack_data.size() / n; res_data.reserve(haystack_data.size()); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); /// The current index in the string array. size_t i = 0; @@ -384,13 +381,13 @@ struct ReplaceStringImpl /// Copy skipped strings without any changes but /// add zero byte to the end of each string. - while (i < haystack_size && begin + n * (i + 1) <= match) + while (i < input_rows_count && begin + n * (i + 1) <= match) { COPY_REST_OF_CURRENT_STRING(); } /// If you have reached the end, it's time to stop - if (i == haystack_size) + if (i == input_rows_count) break; /// Copy unchanged part of current string. diff --git a/src/Functions/StringHelpers.h b/src/Functions/StringHelpers.h index 8f3a87d5d0e..24d8fe86c62 100644 --- a/src/Functions/StringHelpers.h +++ b/src/Functions/StringHelpers.h @@ -62,12 +62,13 @@ using Pos = const char *; template struct ExtractSubstringImpl { - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, - ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) + static void vector( + const ColumnString::Chars & data, const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - size_t size = offsets.size(); - res_offsets.resize(size); - res_data.reserve(size * Extractor::getReserveLengthForElement()); + res_offsets.resize(input_rows_count); + res_data.reserve(input_rows_count * Extractor::getReserveLengthForElement()); size_t prev_offset = 0; size_t res_offset = 0; @@ -76,7 +77,7 @@ struct ExtractSubstringImpl Pos start; size_t length; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Extractor::execute(reinterpret_cast(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length); @@ -99,7 +100,7 @@ struct ExtractSubstringImpl res_data.assign(start, length); } - static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function"); } @@ -111,12 +112,13 @@ struct ExtractSubstringImpl template struct CutSubstringImpl { - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, - ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) + static void vector( + const ColumnString::Chars & data, const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, + size_t input_rows_count) { res_data.reserve(data.size()); - size_t size = offsets.size(); - res_offsets.resize(size); + res_offsets.resize(input_rows_count); size_t prev_offset = 0; size_t res_offset = 0; @@ -125,7 +127,7 @@ struct CutSubstringImpl Pos start; size_t length; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * current = reinterpret_cast(&data[prev_offset]); Extractor::execute(current, offsets[i] - prev_offset - 1, start, length); @@ -154,7 +156,7 @@ struct CutSubstringImpl res_data.append(start + length, data.data() + data.size()); } - static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function"); } diff --git a/src/Functions/URL/ExtractFirstSignificantSubdomain.h b/src/Functions/URL/ExtractFirstSignificantSubdomain.h index 0d1b1cac8ef..4316fd2fc3a 100644 --- a/src/Functions/URL/ExtractFirstSignificantSubdomain.h +++ b/src/Functions/URL/ExtractFirstSignificantSubdomain.h @@ -1,8 +1,8 @@ #pragma once #include -#include "domain.h" -#include "tldLookup.h" +#include +#include #include /// TLDType namespace DB diff --git a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h index 68582198ea3..22fec17654c 100644 --- a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h +++ b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h @@ -64,7 +64,7 @@ public: return arguments[0].type; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get()); FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue()); @@ -72,7 +72,7 @@ public: if (const ColumnString * col = checkAndGetColumn(&*arguments[0].column)) { auto col_res = ColumnString::create(); - vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets()); + vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), input_rows_count); return col_res; } else @@ -82,11 +82,11 @@ public: static void vector(FirstSignificantSubdomainCustomLookup & tld_lookup, const ColumnString::Chars & data, const ColumnString::Offsets & offsets, - ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) + ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - size_t size = offsets.size(); - res_offsets.resize(size); - res_data.reserve(size * Extractor::getReserveLengthForElement()); + res_offsets.resize(input_rows_count); + res_data.reserve(input_rows_count * Extractor::getReserveLengthForElement()); size_t prev_offset = 0; size_t res_offset = 0; @@ -95,7 +95,7 @@ public: Pos start; size_t length; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Extractor::execute(tld_lookup, reinterpret_cast(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length); diff --git a/src/Functions/URL/cutFragment.cpp b/src/Functions/URL/cutFragment.cpp index 3b99edf61a3..b32c4410190 100644 --- a/src/Functions/URL/cutFragment.cpp +++ b/src/Functions/URL/cutFragment.cpp @@ -1,6 +1,6 @@ #include -#include "fragment.h" #include +#include namespace DB { diff --git a/src/Functions/URL/cutQueryString.cpp b/src/Functions/URL/cutQueryString.cpp index 2886adc2e36..d2fa4a004f2 100644 --- a/src/Functions/URL/cutQueryString.cpp +++ b/src/Functions/URL/cutQueryString.cpp @@ -1,6 +1,6 @@ #include -#include "queryString.h" #include +#include namespace DB { diff --git a/src/Functions/URL/cutQueryStringAndFragment.cpp b/src/Functions/URL/cutQueryStringAndFragment.cpp index 4116b352542..ff427beb277 100644 --- a/src/Functions/URL/cutQueryStringAndFragment.cpp +++ b/src/Functions/URL/cutQueryStringAndFragment.cpp @@ -1,6 +1,6 @@ #include -#include "queryStringAndFragment.h" #include +#include namespace DB { diff --git a/src/Functions/URL/cutToFirstSignificantSubdomain.cpp b/src/Functions/URL/cutToFirstSignificantSubdomain.cpp index 6e64b0b6ab8..454a241c54f 100644 --- a/src/Functions/URL/cutToFirstSignificantSubdomain.cpp +++ b/src/Functions/URL/cutToFirstSignificantSubdomain.cpp @@ -1,6 +1,6 @@ #include #include -#include "ExtractFirstSignificantSubdomain.h" +#include namespace DB diff --git a/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp b/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp index 77f40e465a6..7612b6ea4af 100644 --- a/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp +++ b/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp @@ -1,6 +1,6 @@ #include -#include "ExtractFirstSignificantSubdomain.h" -#include "FirstSignificantSubdomainCustomImpl.h" +#include +#include namespace DB { diff --git a/src/Functions/URL/cutURLParameter.cpp b/src/Functions/URL/cutURLParameter.cpp index 7a2b96ec874..3ab9cad1ea7 100644 --- a/src/Functions/URL/cutURLParameter.cpp +++ b/src/Functions/URL/cutURLParameter.cpp @@ -44,7 +44,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr column = arguments[0].column; const ColumnPtr column_needle = arguments[1].column; @@ -71,7 +71,7 @@ public: ColumnString::Chars & vec_res = col_res->getChars(); ColumnString::Offsets & offsets_res = col_res->getOffsets(); - vector(col->getChars(), col->getOffsets(), col_needle, col_needle_const_array, vec_res, offsets_res); + vector(col->getChars(), col->getOffsets(), col_needle, col_needle_const_array, vec_res, offsets_res, input_rows_count); return col_res; } else @@ -130,7 +130,8 @@ public: const ColumnString::Offsets & offsets, const ColumnConst * col_needle, const ColumnArray * col_needle_const_array, - ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) + ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, + size_t input_rows_count) { res_data.reserve(data.size()); res_offsets.resize(offsets.size()); @@ -141,7 +142,7 @@ public: size_t res_offset = 0; size_t cur_res_offset; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { cur_offset = offsets[i]; cur_len = cur_offset - prev_offset; diff --git a/src/Functions/URL/cutWWW.cpp b/src/Functions/URL/cutWWW.cpp index 992d5128440..ab3fae6b094 100644 --- a/src/Functions/URL/cutWWW.cpp +++ b/src/Functions/URL/cutWWW.cpp @@ -1,6 +1,6 @@ #include #include -#include "protocol.h" +#include #include diff --git a/src/Functions/URL/decodeURLComponent.cpp b/src/Functions/URL/decodeURLComponent.cpp index 05e3fbea3fd..bf4aaa6d5e3 100644 --- a/src/Functions/URL/decodeURLComponent.cpp +++ b/src/Functions/URL/decodeURLComponent.cpp @@ -1,7 +1,7 @@ -#include #include #include #include +#include namespace DB @@ -121,8 +121,10 @@ enum URLCodeStrategy template struct CodeURLComponentImpl { - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, - ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) + static void vector( + const ColumnString::Chars & data, const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, + size_t input_rows_count) { if (code_strategy == encode) { @@ -134,13 +136,12 @@ struct CodeURLComponentImpl res_data.resize(data.size()); } - size_t size = offsets.size(); - res_offsets.resize(size); + res_offsets.resize(input_rows_count); size_t prev_offset = 0; size_t res_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * src_data = reinterpret_cast(&data[prev_offset]); size_t src_size = offsets[i] - prev_offset; @@ -165,7 +166,7 @@ struct CodeURLComponentImpl res_data.resize(res_offset); } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions"); } diff --git a/src/Functions/URL/domain.cpp b/src/Functions/URL/domain.cpp index 443a3323075..68724b57a70 100644 --- a/src/Functions/URL/domain.cpp +++ b/src/Functions/URL/domain.cpp @@ -1,5 +1,4 @@ -#include "domain.h" - +#include #include #include diff --git a/src/Functions/URL/domain.h b/src/Functions/URL/domain.h index 936fb9d5f00..328df76b570 100644 --- a/src/Functions/URL/domain.h +++ b/src/Functions/URL/domain.h @@ -1,9 +1,10 @@ #pragma once -#include "protocol.h" -#include -#include #include +#include +#include + +#include namespace DB { diff --git a/src/Functions/URL/domainWithoutWWW.cpp b/src/Functions/URL/domainWithoutWWW.cpp index f6c8b5c84fc..436aad01b82 100644 --- a/src/Functions/URL/domainWithoutWWW.cpp +++ b/src/Functions/URL/domainWithoutWWW.cpp @@ -1,6 +1,6 @@ #include #include -#include "domain.h" +#include namespace DB { diff --git a/src/Functions/URL/firstSignificantSubdomain.cpp b/src/Functions/URL/firstSignificantSubdomain.cpp index b04f6d882ef..e929aefbc27 100644 --- a/src/Functions/URL/firstSignificantSubdomain.cpp +++ b/src/Functions/URL/firstSignificantSubdomain.cpp @@ -1,6 +1,6 @@ #include #include -#include "ExtractFirstSignificantSubdomain.h" +#include namespace DB diff --git a/src/Functions/URL/firstSignificantSubdomainCustom.cpp b/src/Functions/URL/firstSignificantSubdomainCustom.cpp index c07aa2b3ac8..95e5142667b 100644 --- a/src/Functions/URL/firstSignificantSubdomainCustom.cpp +++ b/src/Functions/URL/firstSignificantSubdomainCustom.cpp @@ -1,6 +1,6 @@ #include -#include "ExtractFirstSignificantSubdomain.h" -#include "FirstSignificantSubdomainCustomImpl.h" +#include +#include namespace DB diff --git a/src/Functions/URL/fragment.cpp b/src/Functions/URL/fragment.cpp index 262c1a4c7a6..66da5529d84 100644 --- a/src/Functions/URL/fragment.cpp +++ b/src/Functions/URL/fragment.cpp @@ -1,6 +1,6 @@ #include #include -#include "fragment.h" +#include namespace DB { diff --git a/src/Functions/URL/path.cpp b/src/Functions/URL/path.cpp index 8d609f43191..6de8e4fbf95 100644 --- a/src/Functions/URL/path.cpp +++ b/src/Functions/URL/path.cpp @@ -1,7 +1,7 @@ #include #include #include -#include "path.h" +#include #include diff --git a/src/Functions/URL/pathFull.cpp b/src/Functions/URL/pathFull.cpp index 9aacee21fed..deea617eb29 100644 --- a/src/Functions/URL/pathFull.cpp +++ b/src/Functions/URL/pathFull.cpp @@ -1,7 +1,7 @@ #include #include #include -#include "path.h" +#include #include namespace DB diff --git a/src/Functions/URL/port.cpp b/src/Functions/URL/port.cpp index c8f50f10a56..7492ebcb4e9 100644 --- a/src/Functions/URL/port.cpp +++ b/src/Functions/URL/port.cpp @@ -5,7 +5,7 @@ #include #include #include -#include "domain.h" +#include namespace DB @@ -46,7 +46,7 @@ struct FunctionPortImpl : public IFunction } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { UInt16 default_port = 0; if (arguments.size() == 2) @@ -64,7 +64,7 @@ struct FunctionPortImpl : public IFunction typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(url_column->size()); - vector(default_port, url_strs->getChars(), url_strs->getOffsets(), vec_res); + vector(default_port, url_strs->getChars(), url_strs->getOffsets(), vec_res, input_rows_count); return col_res; } else @@ -73,12 +73,10 @@ struct FunctionPortImpl : public IFunction } private: - static void vector(UInt16 default_port, const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(UInt16 default_port, const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); - ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = extractPort(default_port, data, prev_offset, offsets[i] - prev_offset - 1); prev_offset = offsets[i]; diff --git a/src/Functions/URL/protocol.cpp b/src/Functions/URL/protocol.cpp index 6bed71207f6..1ac395dc554 100644 --- a/src/Functions/URL/protocol.cpp +++ b/src/Functions/URL/protocol.cpp @@ -1,6 +1,6 @@ #include #include -#include "protocol.h" +#include namespace DB diff --git a/src/Functions/URL/queryString.cpp b/src/Functions/URL/queryString.cpp index 7069ce46b38..f6aaf40fc96 100644 --- a/src/Functions/URL/queryString.cpp +++ b/src/Functions/URL/queryString.cpp @@ -1,6 +1,6 @@ #include #include -#include "queryString.h" +#include namespace DB { diff --git a/src/Functions/URL/queryStringAndFragment.cpp b/src/Functions/URL/queryStringAndFragment.cpp index 367a95acbdc..94f1dfa41e3 100644 --- a/src/Functions/URL/queryStringAndFragment.cpp +++ b/src/Functions/URL/queryStringAndFragment.cpp @@ -1,6 +1,6 @@ #include #include -#include "queryStringAndFragment.h" +#include namespace DB { diff --git a/src/Functions/URL/topLevelDomain.cpp b/src/Functions/URL/topLevelDomain.cpp index 25e9f383f60..b3e88832350 100644 --- a/src/Functions/URL/topLevelDomain.cpp +++ b/src/Functions/URL/topLevelDomain.cpp @@ -1,6 +1,6 @@ #include #include -#include "domain.h" +#include namespace DB { diff --git a/src/Functions/UTCTimestampTransform.cpp b/src/Functions/UTCTimestampTransform.cpp index 36ec520068f..35015188078 100644 --- a/src/Functions/UTCTimestampTransform.cpp +++ b/src/Functions/UTCTimestampTransform.cpp @@ -67,7 +67,7 @@ namespace return date_time_type; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { if (arguments.size() != 2) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s arguments number must be 2.", name); @@ -81,11 +81,10 @@ namespace if (WhichDataType(arg1.type).isDateTime()) { const auto & date_time_col = checkAndGetColumn(*arg1.column); - size_t col_size = date_time_col.size(); using ColVecTo = DataTypeDateTime::ColumnType; - typename ColVecTo::MutablePtr result_column = ColVecTo::create(col_size); + typename ColVecTo::MutablePtr result_column = ColVecTo::create(input_rows_count); typename ColVecTo::Container & result_data = result_column->getData(); - for (size_t i = 0; i < col_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { UInt32 date_time_val = date_time_col.getElement(i); LocalDateTime date_time(date_time_val, Name::to ? utc_time_zone : DateLUT::instance(time_zone_val)); @@ -97,14 +96,13 @@ namespace else if (WhichDataType(arg1.type).isDateTime64()) { const auto & date_time_col = checkAndGetColumn(*arg1.column); - size_t col_size = date_time_col.size(); const DataTypeDateTime64 * date_time_type = static_cast(arg1.type.get()); UInt32 col_scale = date_time_type->getScale(); Int64 scale_multiplier = DecimalUtils::scaleMultiplier(col_scale); using ColDecimalTo = DataTypeDateTime64::ColumnType; - typename ColDecimalTo::MutablePtr result_column = ColDecimalTo::create(col_size, col_scale); + typename ColDecimalTo::MutablePtr result_column = ColDecimalTo::create(input_rows_count, col_scale); typename ColDecimalTo::Container & result_data = result_column->getData(); - for (size_t i = 0; i < col_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { DateTime64 date_time_val = date_time_col.getElement(i); Int64 seconds = date_time_val.value / scale_multiplier; diff --git a/src/Functions/acosh.cpp b/src/Functions/acosh.cpp index 5b071da9c40..2bab84c77af 100644 --- a/src/Functions/acosh.cpp +++ b/src/Functions/acosh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct AcoshName - { - static constexpr auto name = "acosh"; - }; - using FunctionAcosh = FunctionMathUnary>; + +struct AcoshName +{ + static constexpr auto name = "acosh"; +}; +using FunctionAcosh = FunctionMathUnary>; } diff --git a/src/Functions/addMicroseconds.cpp b/src/Functions/addMicroseconds.cpp index 0dcd6b4452f..8c0ae06dcd0 100644 --- a/src/Functions/addMicroseconds.cpp +++ b/src/Functions/addMicroseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(AddMicroseconds) { factory.registerFunction(); diff --git a/src/Functions/addMilliseconds.cpp b/src/Functions/addMilliseconds.cpp index 0e2b696d367..83e1f96ec4b 100644 --- a/src/Functions/addMilliseconds.cpp +++ b/src/Functions/addMilliseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(AddMilliseconds) { factory.registerFunction(); diff --git a/src/Functions/addNanoseconds.cpp b/src/Functions/addNanoseconds.cpp index 93eadc814d9..8f9a54752b9 100644 --- a/src/Functions/addNanoseconds.cpp +++ b/src/Functions/addNanoseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(AddNanoseconds) { factory.registerFunction(); diff --git a/src/Functions/aes_encrypt_mysql.cpp b/src/Functions/aes_encrypt_mysql.cpp index fb120151c25..33733f92b27 100644 --- a/src/Functions/aes_encrypt_mysql.cpp +++ b/src/Functions/aes_encrypt_mysql.cpp @@ -7,7 +7,6 @@ namespace DB { - namespace { diff --git a/src/Functions/appendTrailingCharIfAbsent.cpp b/src/Functions/appendTrailingCharIfAbsent.cpp index a5554171aaa..0e57d5c55ce 100644 --- a/src/Functions/appendTrailingCharIfAbsent.cpp +++ b/src/Functions/appendTrailingCharIfAbsent.cpp @@ -57,7 +57,7 @@ private: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & column = arguments[0].column; const auto & column_char = arguments[1].column; @@ -80,14 +80,13 @@ private: auto & dst_data = col_res->getChars(); auto & dst_offsets = col_res->getOffsets(); - const auto size = src_offsets.size(); - dst_data.resize(src_data.size() + size); - dst_offsets.resize(size); + dst_data.resize(src_data.size() + input_rows_count); + dst_offsets.resize(input_rows_count); ColumnString::Offset src_offset{}; ColumnString::Offset dst_offset{}; - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < input_rows_count; ++i) { const auto src_length = src_offsets[i] - src_offset; memcpySmallAllowReadWriteOverflow15(&dst_data[dst_offset], &src_data[src_offset], src_length); diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 3e2a3bf6863..7a61c9d368f 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -143,13 +143,13 @@ private: const IColumn & scores, const IColumn & labels, const ColumnArray::Offsets & offsets, - PaddedPODArray & result) + PaddedPODArray & result, + size_t input_rows_count) { - size_t size = offsets.size(); - result.resize(size); + result.resize(input_rows_count); ColumnArray::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { auto next_offset = offsets[i]; result[i] = apply(scores, labels, current_offset, next_offset); @@ -179,7 +179,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr col1 = arguments[0].column->convertToFullColumnIfConst(); ColumnPtr col2 = arguments[1].column->convertToFullColumnIfConst(); @@ -203,7 +203,8 @@ public: col_array1->getData(), col_array2->getData(), col_array1->getOffsets(), - col_res->getData()); + col_res->getData(), + input_rows_count); return col_res; } diff --git a/src/Functions/array/arrayConcat.cpp b/src/Functions/array/arrayConcat.cpp index cdb361b73b9..768877bac99 100644 --- a/src/Functions/array/arrayConcat.cpp +++ b/src/Functions/array/arrayConcat.cpp @@ -40,7 +40,6 @@ ColumnPtr FunctionArrayConcat::executeImpl(const ColumnsWithTypeAndName & argume if (result_type->onlyNull()) return result_type->createColumnConstWithDefaultValue(input_rows_count); - size_t rows = input_rows_count; size_t num_args = arguments.size(); Columns preprocessed_columns(num_args); @@ -69,7 +68,7 @@ ColumnPtr FunctionArrayConcat::executeImpl(const ColumnsWithTypeAndName & argume } if (const auto * argument_column_array = typeid_cast(argument_column.get())) - sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, rows)); + sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, input_rows_count)); else throw Exception(ErrorCodes::LOGICAL_ERROR, "Arguments for function {} must be arrays.", getName()); } diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h index ad325fe542a..269a7db6e92 100644 --- a/src/Functions/array/arrayEnumerateRanked.h +++ b/src/Functions/array/arrayEnumerateRanked.h @@ -132,7 +132,7 @@ private: /// Hash a set of keys into a UInt128 value. -static inline UInt128 ALWAYS_INLINE hash128depths(const std::vector & indices, const ColumnRawPtrs & key_columns) +static UInt128 hash128depths(const std::vector & indices, const ColumnRawPtrs & key_columns) { SipHash hash; for (size_t j = 0, keys_size = key_columns.size(); j < keys_size; ++j) diff --git a/src/Functions/array/length.cpp b/src/Functions/array/length.cpp index d81c071b55e..760506194fa 100644 --- a/src/Functions/array/length.cpp +++ b/src/Functions/array/length.cpp @@ -16,40 +16,38 @@ struct LengthImpl { static constexpr auto is_fixed_to_constant = true; - static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) res[i] = offsets[i] - 1 - offsets[i - 1]; } - static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t n, UInt64 & res) + static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t n, UInt64 & res, size_t) { res = n; } - static void vectorFixedToVector(const ColumnString::Chars & /*data*/, size_t /*n*/, PaddedPODArray & /*res*/) + static void vectorFixedToVector(const ColumnString::Chars & /*data*/, size_t /*n*/, PaddedPODArray & /*res*/, size_t) { } - static void array(const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void array(const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) res[i] = offsets[i] - offsets[i - 1]; } - [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to UUID argument"); } - [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv6 argument"); } - [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv4 argument"); } diff --git a/src/Functions/ascii.cpp b/src/Functions/ascii.cpp index 7c8158b53d4..e65996a90e1 100644 --- a/src/Functions/ascii.cpp +++ b/src/Functions/ascii.cpp @@ -23,49 +23,43 @@ struct AsciiImpl using ReturnType = Int32; - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); - ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = doAscii(data, prev_offset, offsets[i] - prev_offset - 1); prev_offset = offsets[i]; } } - [[noreturn]] static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, Int32 & /*res*/) + [[noreturn]] static void vectorFixedToConstant(const ColumnString::Chars &, size_t, Int32 &, size_t) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "vectorFixedToConstant not implemented for function {}", AsciiName::name); } - static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res, size_t input_rows_count) { - size_t size = data.size() / n; - - for (size_t i = 0; i < size; ++i) - { + for (size_t i = 0; i < input_rows_count; ++i) res[i] = doAscii(data, i * n, n); - } } - [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) + [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to Array argument", AsciiName::name); } - [[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to UUID argument", AsciiName::name); } - [[noreturn]] static void ipv6(const ColumnIPv6::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv6 argument", AsciiName::name); } - [[noreturn]] static void ipv4(const ColumnIPv4::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv4 argument", AsciiName::name); } diff --git a/src/Functions/asinh.cpp b/src/Functions/asinh.cpp index 6af832ae07c..b5e3626148f 100644 --- a/src/Functions/asinh.cpp +++ b/src/Functions/asinh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct AsinhName - { - static constexpr auto name = "asinh"; - }; - using FunctionAsinh = FunctionMathUnary>; + +struct AsinhName +{ + static constexpr auto name = "asinh"; +}; +using FunctionAsinh = FunctionMathUnary>; } diff --git a/src/Functions/atan2.cpp b/src/Functions/atan2.cpp index 42294e11458..218f4c5406f 100644 --- a/src/Functions/atan2.cpp +++ b/src/Functions/atan2.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct Atan2Name - { - static constexpr auto name = "atan2"; - }; - using FunctionAtan2 = FunctionMathBinaryFloat64>; + +struct Atan2Name +{ + static constexpr auto name = "atan2"; +}; +using FunctionAtan2 = FunctionMathBinaryFloat64>; } diff --git a/src/Functions/atanh.cpp b/src/Functions/atanh.cpp index fab25414725..a36f5bcbcf0 100644 --- a/src/Functions/atanh.cpp +++ b/src/Functions/atanh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct AtanhName - { - static constexpr auto name = "atanh"; - }; - using FunctionAtanh = FunctionMathUnary>; + +struct AtanhName +{ + static constexpr auto name = "atanh"; +}; +using FunctionAtanh = FunctionMathUnary>; } diff --git a/src/Functions/base58Encode.cpp b/src/Functions/base58Encode.cpp index cf790ebddab..3ae2fb12c5e 100644 --- a/src/Functions/base58Encode.cpp +++ b/src/Functions/base58Encode.cpp @@ -3,8 +3,10 @@ namespace DB { + REGISTER_FUNCTION(Base58Encode) { factory.registerFunction>(); } + } diff --git a/src/Functions/base64Decode.cpp b/src/Functions/base64Decode.cpp index 4d06ac99d6f..349475af3f0 100644 --- a/src/Functions/base64Decode.cpp +++ b/src/Functions/base64Decode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64Decode) { FunctionDocumentation::Description description = R"(Accepts a String and decodes it from base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Throws an exception in case of an error. Alias: FROM_BASE64.)"; @@ -19,6 +20,7 @@ REGISTER_FUNCTION(Base64Decode) /// MySQL compatibility alias. factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::Case::Insensitive); } + } #endif diff --git a/src/Functions/base64Encode.cpp b/src/Functions/base64Encode.cpp index 64142995552..fe0fa642599 100644 --- a/src/Functions/base64Encode.cpp +++ b/src/Functions/base64Encode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64Encode) { FunctionDocumentation::Description description = R"(Encodes a String as base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Alias: TO_BASE64.)"; @@ -19,6 +20,7 @@ REGISTER_FUNCTION(Base64Encode) /// MySQL compatibility alias. factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::Case::Insensitive); } + } #endif diff --git a/src/Functions/base64URLDecode.cpp b/src/Functions/base64URLDecode.cpp index f5766dc60bd..f256e111619 100644 --- a/src/Functions/base64URLDecode.cpp +++ b/src/Functions/base64URLDecode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64URLDecode) { FunctionDocumentation::Description description = R"(Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; @@ -16,6 +17,7 @@ REGISTER_FUNCTION(Base64URLDecode) factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } + } #endif diff --git a/src/Functions/base64URLEncode.cpp b/src/Functions/base64URLEncode.cpp index 73a465a30c5..215712f7586 100644 --- a/src/Functions/base64URLEncode.cpp +++ b/src/Functions/base64URLEncode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64URLEncode) { FunctionDocumentation::Description description = R"(Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; @@ -16,6 +17,7 @@ REGISTER_FUNCTION(Base64URLEncode) factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } + } #endif diff --git a/src/Functions/byteSize.cpp b/src/Functions/byteSize.cpp index 93a3a86641a..d366a1b2e12 100644 --- a/src/Functions/byteSize.cpp +++ b/src/Functions/byteSize.cpp @@ -67,11 +67,11 @@ public: const IColumn * column = arguments[arg_num].column.get(); if (arg_num == 0) - for (size_t row_num = 0; row_num < input_rows_count; ++row_num) - vec_res[row_num] = column->byteSizeAt(row_num); + for (size_t row = 0; row < input_rows_count; ++row) + vec_res[row] = column->byteSizeAt(row); else - for (size_t row_num = 0; row_num < input_rows_count; ++row_num) - vec_res[row_num] += column->byteSizeAt(row_num); + for (size_t row = 0; row < input_rows_count; ++row) + vec_res[row] += column->byteSizeAt(row); } return result_col; diff --git a/src/Functions/byteSwap.cpp b/src/Functions/byteSwap.cpp index 6c824b851b0..2094ec4fa1a 100644 --- a/src/Functions/byteSwap.cpp +++ b/src/Functions/byteSwap.cpp @@ -10,6 +10,7 @@ extern const int NOT_IMPLEMENTED; namespace { + template requires std::is_integral_v T byteSwap(T x) diff --git a/src/Functions/caseWithExpression.cpp b/src/Functions/caseWithExpression.cpp index 71fccc8436e..f0a620489ef 100644 --- a/src/Functions/caseWithExpression.cpp +++ b/src/Functions/caseWithExpression.cpp @@ -98,8 +98,7 @@ public: /// Execute transform. ColumnsWithTypeAndName transform_args{args.front(), src_array_col, dst_array_col, args.back()}; - return FunctionFactory::instance().get("transform", context)->build(transform_args) - ->execute(transform_args, result_type, input_rows_count); + return FunctionFactory::instance().get("transform", context)->build(transform_args)->execute(transform_args, result_type, input_rows_count); } private: diff --git a/src/Functions/convertCharset.cpp b/src/Functions/convertCharset.cpp index b3b7394acb9..d998e88e7c2 100644 --- a/src/Functions/convertCharset.cpp +++ b/src/Functions/convertCharset.cpp @@ -88,7 +88,8 @@ private: static void convert(const String & from_charset, const String & to_charset, const ColumnString::Chars & from_chars, const ColumnString::Offsets & from_offsets, - ColumnString::Chars & to_chars, ColumnString::Offsets & to_offsets) + ColumnString::Chars & to_chars, ColumnString::Offsets & to_offsets, + size_t input_rows_count) { auto converter_from = getConverter(from_charset); auto converter_to = getConverter(to_charset); @@ -96,12 +97,11 @@ private: ColumnString::Offset current_from_offset = 0; ColumnString::Offset current_to_offset = 0; - size_t size = from_offsets.size(); - to_offsets.resize(size); + to_offsets.resize(input_rows_count); PODArray uchars; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t from_string_size = from_offsets[i] - current_from_offset - 1; @@ -184,7 +184,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & arg_from = arguments[0]; const ColumnWithTypeAndName & arg_charset_from = arguments[1]; @@ -204,7 +204,7 @@ public: if (const ColumnString * col_from = checkAndGetColumn(arg_from.column.get())) { auto col_to = ColumnString::create(); - convert(charset_from, charset_to, col_from->getChars(), col_from->getOffsets(), col_to->getChars(), col_to->getOffsets()); + convert(charset_from, charset_to, col_from->getChars(), col_from->getOffsets(), col_to->getChars(), col_to->getOffsets(), input_rows_count); return col_to; } else diff --git a/src/Functions/cosh.cpp b/src/Functions/cosh.cpp index 54b52051aab..f4302292303 100644 --- a/src/Functions/cosh.cpp +++ b/src/Functions/cosh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct CoshName - { - static constexpr auto name = "cosh"; - }; - using FunctionCosh = FunctionMathUnary>; + +struct CoshName +{ + static constexpr auto name = "cosh"; +}; +using FunctionCosh = FunctionMathUnary>; } diff --git a/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp b/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp index 3f71bca63d2..99ae4f1927e 100644 --- a/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp +++ b/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp @@ -13,8 +13,7 @@ struct NameCountSubstringsCaseInsensitiveUTF8 static constexpr auto name = "countSubstringsCaseInsensitiveUTF8"; }; -using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch< - CountSubstringsImpl>; +using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch>; } diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index f49e8dee6b7..157068a7c8d 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -26,8 +26,6 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; extern const int BAD_ARGUMENTS; } @@ -45,84 +43,82 @@ public: template void dispatchForColumns( - const IColumn & x, const IColumn & y, + const IColumn & col_x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { - if (const auto * x_vec_16 = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_16, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_32 = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_32, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_32_s = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_32_s, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_64 = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_64, y, timezone_x, timezone_y, result); - else if (const auto * x_const_16 = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_16->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_32 = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_32->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_32_s = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_32_s->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_64 = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_64->getValue>(), y, timezone_x, timezone_y, result); + if (const auto * x_vec_16 = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_16, col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_vec_32 = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_32, col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_vec_32_s = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_32_s, col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_vec_64 = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_64, col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_const_16 = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_16->getValue(), col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_const_32 = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_32->getValue(), col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_const_32_s = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_32_s->getValue(), col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_const_64 = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_64->getValue>(), col_y, timezone_x, timezone_y, input_rows_count, result); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64", - name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64", name); } template void dispatchForSecondColumn( - const LeftColumnType & x, const IColumn & y, + const LeftColumnType & x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) - vectorVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) - vectorVector(x, *y_vec_32, timezone_x, timezone_y, result); - else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) - vectorVector(x, *y_vec_32_s, timezone_x, timezone_y, result); - else if (const auto * y_vec_64 = checkAndGetColumn(&y)) - vectorVector(x, *y_vec_64, timezone_x, timezone_y, result); - else if (const auto * y_const_16 = checkAndGetColumnConst(&y)) - vectorConstant(x, y_const_16->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_32 = checkAndGetColumnConst(&y)) - vectorConstant(x, y_const_32->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_32_s = checkAndGetColumnConst(&y)) - vectorConstant(x, y_const_32_s->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_64 = checkAndGetColumnConst(&y)) - vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, result); + if (const auto * y_vec_16 = checkAndGetColumn(&col_y)) + vectorVector(x, *y_vec_16, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_32 = checkAndGetColumn(&col_y)) + vectorVector(x, *y_vec_32, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_32_s = checkAndGetColumn(&col_y)) + vectorVector(x, *y_vec_32_s, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_64 = checkAndGetColumn(&col_y)) + vectorVector(x, *y_vec_64, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_const_16 = checkAndGetColumnConst(&col_y)) + vectorConstant(x, y_const_16->getValue(), timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_const_32 = checkAndGetColumnConst(&col_y)) + vectorConstant(x, y_const_32->getValue(), timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_const_32_s = checkAndGetColumnConst(&col_y)) + vectorConstant(x, y_const_32_s->getValue(), timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_const_64 = checkAndGetColumnConst(&col_y)) + vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, input_rows_count, result); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", - name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", name); } template void dispatchConstForSecondColumn( - T1 x, const IColumn & y, + T1 x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) - constantVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) - constantVector(x, *y_vec_32, timezone_x, timezone_y, result); - else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) - constantVector(x, *y_vec_32_s, timezone_x, timezone_y, result); - else if (const auto * y_vec_64 = checkAndGetColumn(&y)) - constantVector(x, *y_vec_64, timezone_x, timezone_y, result); + if (const auto * y_vec_16 = checkAndGetColumn(&col_y)) + constantVector(x, *y_vec_16, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_32 = checkAndGetColumn(&col_y)) + constantVector(x, *y_vec_32, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_32_s = checkAndGetColumn(&col_y)) + constantVector(x, *y_vec_32_s, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_64 = checkAndGetColumn(&col_y)) + constantVector(x, *y_vec_64, timezone_x, timezone_y, input_rows_count, result); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", - name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", name); } template void vectorVector( const LeftColumnType & x, const RightColumnType & y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { const auto & x_data = x.getData(); @@ -130,14 +126,15 @@ public: const auto transform_x = TransformDateTime64(getScale(x)); const auto transform_y = TransformDateTime64(getScale(y)); - for (size_t i = 0, size = x.size(); i < size; ++i) - result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y); + for (size_t i = 0; i < input_rows_count; ++i) + result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y); } template void vectorConstant( const LeftColumnType & x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { const auto & x_data = x.getData(); @@ -145,7 +142,7 @@ public: const auto transform_y = TransformDateTime64(getScale(y)); const auto y_value = stripDecimalFieldValue(y); - for (size_t i = 0, size = x.size(); i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) result[i] = calculate(transform_x, transform_y, x_data[i], y_value, timezone_x, timezone_y); } @@ -153,6 +150,7 @@ public: void constantVector( T1 x, const RightColumnType & y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { const auto & y_data = y.getData(); @@ -160,20 +158,22 @@ public: const auto transform_y = TransformDateTime64(getScale(y)); const auto x_value = stripDecimalFieldValue(x); - for (size_t i = 0, size = y.size(); i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) result[i] = calculate(transform_x, transform_y, x_value, y_data[i], timezone_x, timezone_y); } template Int64 calculate(const TransformX & transform_x, const TransformY & transform_y, T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const { + auto res = static_cast(transform_y.execute(y, timezone_y)) - static_cast(transform_x.execute(x, timezone_x)); + if constexpr (is_diff) - return static_cast(transform_y.execute(y, timezone_y)) - - static_cast(transform_x.execute(x, timezone_x)); + { + return res; + } else { - auto res = static_cast(transform_y.execute(y, timezone_y)) - - static_cast(transform_x.execute(x, timezone_x)); + /// Adjust res: DateTimeComponentsWithFractionalPart a_comp; DateTimeComponentsWithFractionalPart b_comp; Int64 adjust_value; @@ -332,95 +332,73 @@ public: static constexpr auto name = is_relative ? "dateDiff" : "age"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } bool isVariadic() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 3}; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 3 && arguments.size() != 4) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 3 or 4", - getName(), arguments.size()); + FunctionArgumentDescriptors mandatory_args{ + {"unit", static_cast(&isString), nullptr, "String"}, + {"startdate", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + {"enddate", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + }; - if (!isString(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument for function {} (unit) must be String", - getName()); + FunctionArgumentDescriptors optional_args{ + {"timezone", static_cast(&isString), nullptr, "String"}, + }; - if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument for function {} must be Date, Date32, DateTime or DateTime64", - getName()); - - if (!isDate(arguments[2]) && !isDate32(arguments[2]) && !isDateTime(arguments[2]) && !isDateTime64(arguments[2])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Third argument for function {} must be Date, Date32, DateTime or DateTime64", - getName() - ); - - if (arguments.size() == 4 && !isString(arguments[3])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Fourth argument for function {} (timezone) must be String", - getName()); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 3}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * unit_column = checkAndGetColumnConst(arguments[0].column.get()); - if (!unit_column) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "First argument for function {} must be constant String", - getName()); + const auto * col_unit = checkAndGetColumnConst(arguments[0].column.get()); + if (!col_unit) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument for function {} must be constant String", getName()); - String unit = Poco::toLower(unit_column->getValue()); + String unit = Poco::toLower(col_unit->getValue()); - const IColumn & x = *arguments[1].column; - const IColumn & y = *arguments[2].column; + const IColumn & col_x = *arguments[1].column; + const IColumn & col_y = *arguments[2].column; - size_t rows = input_rows_count; - auto res = ColumnInt64::create(rows); + auto col_res = ColumnInt64::create(input_rows_count); const auto & timezone_x = extractTimeZoneFromFunctionArguments(arguments, 3, 1); const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); if (unit == "year" || unit == "years" || unit == "yy" || unit == "yyyy") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "quarter" || unit == "quarters" || unit == "qq" || unit == "q") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "month" || unit == "months" || unit == "mm" || unit == "m") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "week" || unit == "weeks" || unit == "wk" || unit == "ww") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "day" || unit == "days" || unit == "dd" || unit == "d") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "hour" || unit == "hours" || unit == "hh" || unit == "h") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "minute" || unit == "minutes" || unit == "mi" || unit == "n") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "second" || unit == "seconds" || unit == "ss" || unit == "s") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "millisecond" || unit == "milliseconds" || unit == "ms") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "microsecond" || unit == "microseconds" || unit == "us" || unit == "u") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "nanosecond" || unit == "nanoseconds" || unit == "ns") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Function {} does not support '{}' unit", getName(), unit); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); - return res; + return col_res; } private: DateDiffImpl impl{name}; @@ -437,50 +415,35 @@ public: static constexpr auto name = "timeDiff"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } - + String getName() const override { return name; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } bool isVariadic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } size_t getNumberOfArguments() const override { return 2; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 2", - getName(), arguments.size()); + FunctionArgumentDescriptors args{ + {"first_datetime", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + {"second_datetime", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + }; - if (!isDate(arguments[0]) && !isDate32(arguments[0]) && !isDateTime(arguments[0]) && !isDateTime64(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument for function {} must be Date, Date32, DateTime or DateTime64", - getName()); - - if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument for function {} must be Date, Date32, DateTime or DateTime64", - getName() - ); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const IColumn & x = *arguments[0].column; - const IColumn & y = *arguments[1].column; + const IColumn & col_x = *arguments[0].column; + const IColumn & col_y = *arguments[1].column; - size_t rows = input_rows_count; - auto res = ColumnInt64::create(rows); + auto col_res = ColumnInt64::create(input_rows_count); - impl.dispatchForColumns>(x, y, DateLUT::instance(), DateLUT::instance(), res->getData()); + impl.dispatchForColumns>(col_x, col_y, DateLUT::instance(), DateLUT::instance(), input_rows_count, col_res->getData()); - return res; + return col_res; } private: DateDiffImpl impl{name}; diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp index 8165ea1b8d3..846cb87f1ee 100644 --- a/src/Functions/dateName.cpp +++ b/src/Functions/dateName.cpp @@ -109,14 +109,14 @@ public: ColumnPtr executeImpl( const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, - [[maybe_unused]] size_t input_rows_count) const override + size_t input_rows_count) const override { ColumnPtr res; - if (!((res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)))) + if (!((res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)))) throw Exception( ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Date or DateTime.", @@ -127,7 +127,7 @@ public: } template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { auto * times = checkAndGetColumn(arguments[1].column.get()); if (!times) @@ -144,7 +144,7 @@ public: String date_part = date_part_column->getValue(); const DateLUTImpl * time_zone_tmp; - if (std::is_same_v || std::is_same_v) + if constexpr (std::is_same_v || std::is_same_v) time_zone_tmp = &extractTimeZoneFromFunctionArguments(arguments, 2, 1); else time_zone_tmp = &DateLUT::instance(); @@ -175,7 +175,7 @@ public: using TimeType = DateTypeToTimeType; callOnDatePartWriter(date_part, [&](const auto & writer) { - for (size_t i = 0; i < times_data.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (std::is_same_v) { diff --git a/src/Functions/decodeHTMLComponent.cpp b/src/Functions/decodeHTMLComponent.cpp index 00a601b77a6..d36bee534a8 100644 --- a/src/Functions/decodeHTMLComponent.cpp +++ b/src/Functions/decodeHTMLComponent.cpp @@ -28,20 +28,20 @@ namespace const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { /// The size of result is always not more than the size of source. /// Because entities decodes to the shorter byte sequence. /// Example: &#xx... &#xx... will decode to UTF-8 byte sequence not longer than 4 bytes. res_data.resize(data.size()); - size_t size = offsets.size(); - res_offsets.resize(size); + res_offsets.resize(input_rows_count); size_t prev_offset = 0; size_t res_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * src_data = reinterpret_cast(&data[prev_offset]); size_t src_size = offsets[i] - prev_offset; @@ -55,7 +55,7 @@ namespace res_data.resize(res_offset); } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function decodeHTMLComponent cannot work with FixedString argument"); } @@ -64,7 +64,6 @@ namespace static const int max_legal_unicode_value = 0x10FFFF; static const int max_decimal_length_of_unicode_point = 7; /// 1114111 - static size_t execute(const char * src, size_t src_size, char * dst) { const char * src_pos = src; diff --git a/src/Functions/decodeXMLComponent.cpp b/src/Functions/decodeXMLComponent.cpp index cbbe46fcb8c..8743aa4759d 100644 --- a/src/Functions/decodeXMLComponent.cpp +++ b/src/Functions/decodeXMLComponent.cpp @@ -27,20 +27,20 @@ namespace const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { /// The size of result is always not more than the size of source. /// Because entities decodes to the shorter byte sequence. /// Example: &#xx... &#xx... will decode to UTF-8 byte sequence not longer than 4 bytes. res_data.resize(data.size()); - size_t size = offsets.size(); - res_offsets.resize(size); + res_offsets.resize(input_rows_count); size_t prev_offset = 0; size_t res_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * src_data = reinterpret_cast(&data[prev_offset]); size_t src_size = offsets[i] - prev_offset; @@ -54,7 +54,7 @@ namespace res_data.resize(res_offset); } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function decodeXMLComponent cannot work with FixedString argument"); } diff --git a/src/Functions/degrees.cpp b/src/Functions/degrees.cpp index 8646eb54d9a..94b5ce3682c 100644 --- a/src/Functions/degrees.cpp +++ b/src/Functions/degrees.cpp @@ -7,18 +7,20 @@ namespace DB { namespace { - struct DegreesName - { - static constexpr auto name = "degrees"; - }; - Float64 degrees(Float64 r) - { - Float64 degrees = r * (180 / M_PI); - return degrees; - } +struct DegreesName +{ + static constexpr auto name = "degrees"; +}; + +Float64 degrees(Float64 r) +{ + Float64 degrees = r * (180 / M_PI); + return degrees; +} + +using FunctionDegrees = FunctionMathUnary>; - using FunctionDegrees = FunctionMathUnary>; } REGISTER_FUNCTION(Degrees) diff --git a/src/Functions/encodeXMLComponent.cpp b/src/Functions/encodeXMLComponent.cpp index 64d85ecaeb8..a22917838b7 100644 --- a/src/Functions/encodeXMLComponent.cpp +++ b/src/Functions/encodeXMLComponent.cpp @@ -25,17 +25,17 @@ namespace const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { /// 6 is the maximum size amplification (the maximum length of encoded entity: ") res_data.resize(data.size() * 6); - size_t size = offsets.size(); - res_offsets.resize(size); + res_offsets.resize(input_rows_count); size_t prev_offset = 0; size_t res_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * src_data = reinterpret_cast(&data[prev_offset]); size_t src_size = offsets[i] - prev_offset; @@ -49,7 +49,7 @@ namespace res_data.resize(res_offset); } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function encodeXML cannot work with FixedString argument"); } diff --git a/src/Functions/filesystem.cpp b/src/Functions/filesystem.cpp index 9fbf9b0cbe7..9b168f3f088 100644 --- a/src/Functions/filesystem.cpp +++ b/src/Functions/filesystem.cpp @@ -91,7 +91,7 @@ public: auto col_res = ColumnVector::create(col_str->size()); auto & data = col_res->getData(); - for (size_t i = 0; i < col_str->size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { auto disk_name = col_str->getDataAt(i).toString(); if (auto it = disk_map.find(disk_name); it != disk_map.end()) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index f89afd67e78..f33b7849a43 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -848,7 +848,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, [[maybe_unused]] size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { ColumnPtr res; if constexpr (support_integer == SupportInteger::Yes) @@ -862,17 +862,17 @@ public: if (!castType(arguments[0].type.get(), [&](const auto & type) { using FromDataType = std::decay_t; - if (!(res = executeType(arguments, result_type))) + if (!(res = executeType(arguments, result_type, input_rows_count))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Integer, Date, Date32, DateTime or DateTime64.", arguments[0].column->getName(), getName()); return true; })) { - if (!((res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)))) + if (!((res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Integer or DateTime.", arguments[0].column->getName(), getName()); @@ -881,10 +881,10 @@ public: } else { - if (!((res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)))) + if (!((res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Date or DateTime.", arguments[0].column->getName(), getName()); @@ -894,7 +894,7 @@ public: } template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { auto non_const_datetime = arguments[0].column->convertToFullColumnIfConst(); auto * times = checkAndGetColumn(non_const_datetime.get()); @@ -955,13 +955,11 @@ public: else time_zone = &DateLUT::instance(); - const auto & vec = times->getData(); - auto col_res = ColumnString::create(); auto & res_data = col_res->getChars(); auto & res_offsets = col_res->getOffsets(); - res_data.resize(vec.size() * (out_template_size + 1)); - res_offsets.resize(vec.size()); + res_data.resize(input_rows_count * (out_template_size + 1)); + res_offsets.resize(input_rows_count); if constexpr (format_syntax == FormatSyntax::MySQL) { @@ -990,9 +988,11 @@ public: } } + const auto & vec = times->getData(); + auto * begin = reinterpret_cast(res_data.data()); auto * pos = begin; - for (size_t i = 0; i < vec.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if (!const_time_zone_column && arguments.size() > 2) { diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index d10b3f9a5b7..4e3f302ce36 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -75,7 +75,7 @@ public: if (const ColumnString * col_query_string = checkAndGetColumn(col_query.get())) { auto col_res = ColumnString::create(); - formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map); + formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map, input_rows_count); if (error_handling == ErrorHandling::Null) return ColumnNullable::create(std::move(col_res), std::move(col_null_map)); @@ -92,16 +92,16 @@ private: const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, - ColumnUInt8::MutablePtr & res_null_map) const + ColumnUInt8::MutablePtr & res_null_map, + size_t input_rows_count) const { - const size_t size = offsets.size(); - res_offsets.resize(size); + res_offsets.resize(input_rows_count); res_data.resize(data.size()); size_t prev_offset = 0; size_t res_data_size = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * begin = reinterpret_cast(&data[prev_offset]); const char * end = begin + offsets[i] - prev_offset - 1; diff --git a/src/Functions/formatReadable.h b/src/Functions/formatReadable.h index 487ec9d79d0..9161ab43e28 100644 --- a/src/Functions/formatReadable.h +++ b/src/Functions/formatReadable.h @@ -55,19 +55,19 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr res; - if (!((res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)))) + if (!((res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()); @@ -76,7 +76,7 @@ public: private: template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { if (const ColumnVector * col_from = checkAndGetColumn>(arguments[0].column.get())) { @@ -85,13 +85,12 @@ private: const typename ColumnVector::Container & vec_from = col_from->getData(); ColumnString::Chars & data_to = col_to->getChars(); ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - data_to.resize(size * 2); - offsets_to.resize(size); + data_to.resize(input_rows_count * 2); + offsets_to.resize(input_rows_count); WriteBufferFromVector buf_to(data_to); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Impl::format(static_cast(vec_from[i]), buf_to); writeChar(0, buf_to); diff --git a/src/Functions/geohashDecode.cpp b/src/Functions/geohashDecode.cpp index 96ad7dacfc4..cace6c09fec 100644 --- a/src/Functions/geohashDecode.cpp +++ b/src/Functions/geohashDecode.cpp @@ -51,21 +51,19 @@ public: } template - bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column) const + bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column, size_t input_rows_count) const { const auto * encoded = checkAndGetColumn(encoded_column); if (!encoded) return false; - const size_t count = encoded->size(); - - auto latitude = ColumnFloat64::create(count); - auto longitude = ColumnFloat64::create(count); + auto latitude = ColumnFloat64::create(input_rows_count); + auto longitude = ColumnFloat64::create(input_rows_count); ColumnFloat64::Container & lon_data = longitude->getData(); ColumnFloat64::Container & lat_data = latitude->getData(); - for (size_t i = 0; i < count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { std::string_view encoded_string = encoded->getDataAt(i).toView(); geohashDecode(encoded_string.data(), encoded_string.size(), &lon_data[i], &lat_data[i]); @@ -79,13 +77,13 @@ public: return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * encoded = arguments[0].column.get(); ColumnPtr res_column; - if (tryExecute(encoded, res_column) || - tryExecute(encoded, res_column)) + if (tryExecute(encoded, res_column, input_rows_count) || + tryExecute(encoded, res_column, input_rows_count)) return res_column; throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported argument type:{} of argument of function {}", diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index 034c8188b63..c49acddd81f 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -53,7 +53,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * longitude = arguments[0].column.get(); const IColumn * latitude = arguments[1].column.get(); @@ -65,26 +65,24 @@ public: precision = arguments[2].column; ColumnPtr res_column; - vector(longitude, latitude, precision.get(), res_column); + vector(longitude, latitude, precision.get(), res_column, input_rows_count); return res_column; } private: - void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const + void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result, size_t input_rows_count) const { auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); - const size_t size = lat_column->size(); - - out_offsets.resize(size); - out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); + out_offsets.resize(input_rows_count); + out_vec.resize(input_rows_count * (GEOHASH_MAX_TEXT_LENGTH + 1)); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const Float64 longitude_value = lon_column->getFloat64(i); const Float64 latitude_value = lat_column->getFloat64(i); diff --git a/src/Functions/hilbertEncode.cpp b/src/Functions/hilbertEncode.cpp index 13512d0d36c..a4e3cc59b76 100644 --- a/src/Functions/hilbertEncode.cpp +++ b/src/Functions/hilbertEncode.cpp @@ -11,8 +11,8 @@ namespace DB namespace ErrorCodes { - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } @@ -87,7 +87,7 @@ public: return col_res; } - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Illegal number of UInt arguments of function {}: should be not more than 2 dimensions", getName()); } diff --git a/src/Functions/idna.cpp b/src/Functions/idna.cpp index 5a7ae3485ba..cf9e855c912 100644 --- a/src/Functions/idna.cpp +++ b/src/Functions/idna.cpp @@ -44,15 +44,15 @@ struct IdnaEncode const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - const size_t rows = offsets.size(); res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII - res_offsets.reserve(rows); + res_offsets.reserve(input_rows_count); size_t prev_offset = 0; std::string ascii; - for (size_t row = 0; row < rows; ++row) + for (size_t row = 0; row < input_rows_count; ++row) { const char * value = reinterpret_cast(&data[prev_offset]); const size_t value_length = offsets[row] - prev_offset - 1; @@ -85,7 +85,7 @@ struct IdnaEncode } } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed"); } @@ -99,15 +99,15 @@ struct IdnaDecode const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - const size_t rows = offsets.size(); res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII - res_offsets.reserve(rows); + res_offsets.reserve(input_rows_count); size_t prev_offset = 0; std::string unicode; - for (size_t row = 0; row < rows; ++row) + for (size_t row = 0; row < input_rows_count; ++row) { const char * ascii = reinterpret_cast(&data[prev_offset]); const size_t ascii_length = offsets[row] - prev_offset - 1; @@ -124,7 +124,7 @@ struct IdnaDecode } } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed"); } diff --git a/src/Functions/indexHint.h b/src/Functions/indexHint.h index a71e1555c78..bc788601cd0 100644 --- a/src/Functions/indexHint.h +++ b/src/Functions/indexHint.h @@ -2,14 +2,12 @@ #include #include #include +#include namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; - /** The `indexHint` function takes any number of any arguments and always returns one. * * This function has a special meaning (see ExpressionAnalyzer, KeyCondition) @@ -64,11 +62,11 @@ public: return DataTypeUInt8().createColumnConst(input_rows_count, 1u); } - void setActions(ActionsDAGPtr actions_) { actions = std::move(actions_); } - const ActionsDAGPtr & getActions() const { return actions; } + void setActions(ActionsDAG actions_) { actions = std::move(actions_); } + const ActionsDAG & getActions() const { return actions; } private: - ActionsDAGPtr actions; + ActionsDAG actions; }; } diff --git a/src/Functions/initcap.cpp b/src/Functions/initcap.cpp index 4661ce117c0..00414b22344 100644 --- a/src/Functions/initcap.cpp +++ b/src/Functions/initcap.cpp @@ -9,10 +9,12 @@ namespace struct InitcapImpl { - static void vector(const ColumnString::Chars & data, + static void vector( + const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t /*input_rows_count*/) { if (data.empty()) return; @@ -21,7 +23,7 @@ struct InitcapImpl array(data.data(), data.data() + data.size(), res_data.data()); } - static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data) + static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data, size_t) { res_data.resize(data.size()); array(data.data(), data.data() + data.size(), res_data.data()); diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp index 076dcff6622..282d846094e 100644 --- a/src/Functions/initcapUTF8.cpp +++ b/src/Functions/initcapUTF8.cpp @@ -22,7 +22,8 @@ struct InitcapUTF8Impl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t /*input_rows_count*/) { if (data.empty()) return; @@ -31,7 +32,7 @@ struct InitcapUTF8Impl array(data.data(), data.data() + data.size(), offsets, res_data.data()); } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function initcapUTF8 cannot work with FixedString argument"); } diff --git a/src/Functions/isValidUTF8.cpp b/src/Functions/isValidUTF8.cpp index d5f5e6a8986..1959502af06 100644 --- a/src/Functions/isValidUTF8.cpp +++ b/src/Functions/isValidUTF8.cpp @@ -219,42 +219,42 @@ SOFTWARE. static constexpr bool is_fixed_to_constant = false; - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); size_t prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = isValidUTF8(data.data() + prev_offset, offsets[i] - 1 - prev_offset); prev_offset = offsets[i]; } } - static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/) {} - - static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + static void vectorFixedToConstant(const ColumnString::Chars &, size_t, UInt8 &, size_t) { - size_t size = data.size() / n; - for (size_t i = 0; i < size; ++i) + } + + static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res, size_t input_rows_count) + { + for (size_t i = 0; i < input_rows_count; ++i) res[i] = isValidUTF8(data.data() + i * n, n); } - [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &) + [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to Array argument"); } - [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to UUID argument"); } - [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to IPv6 argument"); } - [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to IPv4 argument"); } diff --git a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp index 1c5164e132d..cc9e57ac186 100644 --- a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp +++ b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp @@ -54,7 +54,7 @@ class ExtractKeyValuePairs : public IFunction return builder.build(); } - ColumnPtr extract(ColumnPtr data_column, std::shared_ptr extractor) const + ColumnPtr extract(ColumnPtr data_column, std::shared_ptr extractor, size_t input_rows_count) const { auto offsets = ColumnUInt64::create(); @@ -63,7 +63,7 @@ class ExtractKeyValuePairs : public IFunction uint64_t offset = 0u; - for (auto i = 0u; i < data_column->size(); i++) + for (auto i = 0u; i < input_rows_count; i++) { auto row = data_column->getDataAt(i).toView(); @@ -97,13 +97,13 @@ public: return std::make_shared(context); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto parsed_arguments = ArgumentExtractor::extract(arguments); auto extractor = getExtractor(parsed_arguments); - return extract(parsed_arguments.data_column, extractor); + return extract(parsed_arguments.data_column, extractor, input_rows_count); } DataTypePtr getReturnTypeImpl(const DataTypes &) const override diff --git a/src/Functions/lengthUTF8.cpp b/src/Functions/lengthUTF8.cpp index 59a0d532602..97a42816674 100644 --- a/src/Functions/lengthUTF8.cpp +++ b/src/Functions/lengthUTF8.cpp @@ -23,48 +23,42 @@ struct LengthUTF8Impl { static constexpr auto is_fixed_to_constant = false; - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); - ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = UTF8::countCodePoints(&data[prev_offset], offsets[i] - prev_offset - 1); prev_offset = offsets[i]; } } - static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt64 & /*res*/) + static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt64 & /*res*/, size_t) { } - static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res, size_t input_rows_count) { - size_t size = data.size() / n; - - for (size_t i = 0; i < size; ++i) - { + for (size_t i = 0; i < input_rows_count; ++i) res[i] = UTF8::countCodePoints(&data[i * n], n); - } } - [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &) + [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to Array argument"); } - [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to UUID argument"); } - [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to IPv6 argument"); } - [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to IPv4 argument"); } diff --git a/src/Functions/mortonEncode.cpp b/src/Functions/mortonEncode.cpp index 0c19c7c3134..8cf0f18dbfe 100644 --- a/src/Functions/mortonEncode.cpp +++ b/src/Functions/mortonEncode.cpp @@ -16,8 +16,8 @@ namespace DB namespace ErrorCodes { - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } #define EXTRACT_VECTOR(INDEX) \ @@ -130,7 +130,7 @@ namespace ErrorCodes MASK(8, 6, col6->getUInt(i)), \ MASK(8, 7, col7->getUInt(i))) \ \ - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, \ + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, \ "Illegal number of UInt arguments of function {}, max: 8", \ getName()); \ diff --git a/src/Functions/normalizeQuery.cpp b/src/Functions/normalizeQuery.cpp index ad9a8903733..1a78bce7d29 100644 --- a/src/Functions/normalizeQuery.cpp +++ b/src/Functions/normalizeQuery.cpp @@ -19,17 +19,19 @@ template struct Impl { static constexpr auto name = keep_names ? "normalizeQueryKeepNames" : "normalizeQuery"; - static void vector(const ColumnString::Chars & data, + + static void vector( + const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - size_t size = offsets.size(); - res_offsets.resize(size); + res_offsets.resize(input_rows_count); res_data.reserve(data.size()); ColumnString::Offset prev_src_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { ColumnString::Offset curr_src_offset = offsets[i]; @@ -43,7 +45,7 @@ struct Impl } } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot apply function normalizeQuery to fixed string."); } diff --git a/src/Functions/normalizeString.cpp b/src/Functions/normalizeString.cpp index 92411490eaa..c56508ced0e 100644 --- a/src/Functions/normalizeString.cpp +++ b/src/Functions/normalizeString.cpp @@ -84,7 +84,8 @@ struct NormalizeUTF8Impl static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { UErrorCode err = U_ZERO_ERROR; @@ -92,8 +93,7 @@ struct NormalizeUTF8Impl if (U_FAILURE(err)) throw Exception(ErrorCodes::CANNOT_NORMALIZE_STRING, "Normalization failed (getNormalizer): {}", u_errorName(err)); - size_t size = offsets.size(); - res_offsets.resize(size); + res_offsets.resize(input_rows_count); res_data.reserve(data.size() * 2); @@ -103,7 +103,7 @@ struct NormalizeUTF8Impl PODArray from_uchars; PODArray to_uchars; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t from_size = offsets[i] - current_from_offset - 1; @@ -157,7 +157,7 @@ struct NormalizeUTF8Impl res_data.resize(current_to_offset); } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot apply function normalizeUTF8 to fixed string."); } diff --git a/src/Functions/normalizedQueryHash.cpp b/src/Functions/normalizedQueryHash.cpp index 63218f28af5..3dbe3ff9847 100644 --- a/src/Functions/normalizedQueryHash.cpp +++ b/src/Functions/normalizedQueryHash.cpp @@ -27,13 +27,13 @@ struct Impl static void vector( const ColumnString::Chars & data, const ColumnString::Offsets & offsets, - PaddedPODArray & res_data) + PaddedPODArray & res_data, + size_t input_rows_count) { - size_t size = offsets.size(); - res_data.resize(size); + res_data.resize(input_rows_count); ColumnString::Offset prev_src_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { ColumnString::Offset curr_src_offset = offsets[i]; res_data[i] = normalizedQueryHash( @@ -77,15 +77,15 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr column = arguments[0].column; if (const ColumnString * col = checkAndGetColumn(column.get())) { auto col_res = ColumnUInt64::create(); typename ColumnUInt64::Container & vec_res = col_res->getData(); - vec_res.resize(col->size()); - Impl::vector(col->getChars(), col->getOffsets(), vec_res); + vec_res.resize(input_rows_count); + Impl::vector(col->getChars(), col->getOffsets(), vec_res, input_rows_count); return col_res; } else diff --git a/src/Functions/pointInEllipses.cpp b/src/Functions/pointInEllipses.cpp index 2147428cee3..ac7a8cc4204 100644 --- a/src/Functions/pointInEllipses.cpp +++ b/src/Functions/pointInEllipses.cpp @@ -91,8 +91,6 @@ private: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto size = input_rows_count; - /// Prepare array of ellipses. size_t ellipses_count = (arguments.size() - 2) / 4; std::vector ellipses(ellipses_count); @@ -141,13 +139,11 @@ private: auto dst = ColumnVector::create(); auto & dst_data = dst->getData(); - dst_data.resize(size); + dst_data.resize(input_rows_count); size_t start_index = 0; - for (const auto row : collections::range(0, size)) - { + for (size_t row = 0; row < input_rows_count; ++row) dst_data[row] = isPointInEllipses(col_vec_x->getData()[row], col_vec_y->getData()[row], ellipses.data(), ellipses_count, start_index); - } return dst; } @@ -157,7 +153,7 @@ private: const auto * col_const_y = assert_cast (col_y); size_t start_index = 0; UInt8 res = isPointInEllipses(col_const_x->getValue(), col_const_y->getValue(), ellipses.data(), ellipses_count, start_index); - return DataTypeUInt8().createColumnConst(size, res); + return DataTypeUInt8().createColumnConst(input_rows_count, res); } else { diff --git a/src/Functions/punycode.cpp b/src/Functions/punycode.cpp index 8004e3731b5..ec1fcfd0a70 100644 --- a/src/Functions/punycode.cpp +++ b/src/Functions/punycode.cpp @@ -6,11 +6,11 @@ #include #include -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wnewline-eof" -# include -# include -# pragma clang diagnostic pop +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnewline-eof" +#include +#include +#pragma clang diagnostic pop namespace DB { @@ -38,16 +38,16 @@ struct PunycodeEncode const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - const size_t rows = offsets.size(); res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII - res_offsets.reserve(rows); + res_offsets.reserve(input_rows_count); size_t prev_offset = 0; std::u32string value_utf32; std::string value_puny; - for (size_t row = 0; row < rows; ++row) + for (size_t row = 0; row < input_rows_count; ++row) { const char * value = reinterpret_cast(&data[prev_offset]); const size_t value_length = offsets[row] - prev_offset - 1; @@ -72,7 +72,7 @@ struct PunycodeEncode } } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed"); } @@ -86,16 +86,16 @@ struct PunycodeDecode const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - const size_t rows = offsets.size(); res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII - res_offsets.reserve(rows); + res_offsets.reserve(input_rows_count); size_t prev_offset = 0; std::u32string value_utf32; std::string value_utf8; - for (size_t row = 0; row < rows; ++row) + for (size_t row = 0; row < input_rows_count; ++row) { const char * value = reinterpret_cast(&data[prev_offset]); const size_t value_length = offsets[row] - prev_offset - 1; @@ -129,7 +129,7 @@ struct PunycodeDecode } } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed"); } diff --git a/src/Functions/randDistribution.cpp b/src/Functions/randDistribution.cpp index 4e616ada697..dc2ddf2ef24 100644 --- a/src/Functions/randDistribution.cpp +++ b/src/Functions/randDistribution.cpp @@ -24,6 +24,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } namespace @@ -92,6 +93,9 @@ struct ChiSquaredDistribution static void generate(Float64 degree_of_freedom, ColumnFloat64::Container & container) { + if (degree_of_freedom <= 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument (degrees of freedom) of function {} should be greater than zero", getName()); + auto distribution = std::chi_squared_distribution<>(degree_of_freedom); for (auto & elem : container) elem = distribution(thread_local_rng); @@ -106,6 +110,9 @@ struct StudentTDistribution static void generate(Float64 degree_of_freedom, ColumnFloat64::Container & container) { + if (degree_of_freedom <= 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument (degrees of freedom) of function {} should be greater than zero", getName()); + auto distribution = std::student_t_distribution<>(degree_of_freedom); for (auto & elem : container) elem = distribution(thread_local_rng); @@ -120,6 +127,9 @@ struct FisherFDistribution static void generate(Float64 d1, Float64 d2, ColumnFloat64::Container & container) { + if (d1 <= 0 || d2 <= 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument (degrees of freedom) of function {} should be greater than zero", getName()); + auto distribution = std::fisher_f_distribution<>(d1, d2); for (auto & elem : container) elem = distribution(thread_local_rng); @@ -246,7 +256,7 @@ public: { auto desired = Distribution::getNumberOfArguments(); if (arguments.size() != desired && arguments.size() != desired + 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function {}. Should be {} or {}", getName(), desired, desired + 1); @@ -299,7 +309,7 @@ public: } else { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "More than two argument specified for function {}", getName()); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "More than two arguments specified for function {}", getName()); } return res_column; diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp index 5293b688678..81c9d20ce82 100644 --- a/src/Functions/reinterpretAs.cpp +++ b/src/Functions/reinterpretAs.cpp @@ -114,7 +114,7 @@ public: return to_type; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { auto from_type = arguments[0].type; @@ -136,9 +136,9 @@ public: ColumnFixedString * dst_concrete = assert_cast(dst.get()); if (src.isFixedAndContiguous() && src.sizeOfValueIfFixed() == dst_concrete->getN()) - executeContiguousToFixedString(src, *dst_concrete, dst_concrete->getN()); + executeContiguousToFixedString(src, *dst_concrete, dst_concrete->getN(), input_rows_count); else - executeToFixedString(src, *dst_concrete, dst_concrete->getN()); + executeToFixedString(src, *dst_concrete, dst_concrete->getN(), input_rows_count); result = std::move(dst); @@ -156,7 +156,7 @@ public: MutableColumnPtr dst = result_type->createColumn(); ColumnString * dst_concrete = assert_cast(dst.get()); - executeToString(src, *dst_concrete); + executeToString(src, *dst_concrete, input_rows_count); result = std::move(dst); @@ -174,12 +174,11 @@ public: const auto & data_from = col_from->getChars(); const auto & offsets_from = col_from->getOffsets(); - size_t size = offsets_from.size(); auto & vec_res = col_res->getData(); - vec_res.resize_fill(size); + vec_res.resize_fill(input_rows_count); size_t offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t copy_size = std::min(static_cast(sizeof(ToFieldType)), offsets_from[i] - offset - 1); if constexpr (std::endian::native == std::endian::little) @@ -209,7 +208,6 @@ public: const auto& data_from = col_from_fixed->getChars(); size_t step = col_from_fixed->getN(); - size_t size = data_from.size() / step; auto & vec_res = col_res->getData(); size_t offset = 0; @@ -217,11 +215,11 @@ public: size_t index = data_from.size() - copy_size; if (sizeof(ToFieldType) <= step) - vec_res.resize(size); + vec_res.resize(input_rows_count); else - vec_res.resize_fill(size); + vec_res.resize_fill(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (std::endian::native == std::endian::little) memcpy(&vec_res[i], &data_from[offset], copy_size); @@ -251,12 +249,11 @@ public: auto & from = column_from->getData(); auto & to = column_to->getData(); - size_t size = from.size(); - to.resize_fill(size); + to.resize_fill(input_rows_count); static constexpr size_t copy_size = std::min(sizeof(From), sizeof(To)); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (std::endian::native == std::endian::little) memcpy(static_cast(&to[i]), static_cast(&from[i]), copy_size); @@ -307,14 +304,13 @@ private: type.isDecimal(); } - static void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n) + static void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n, size_t input_rows_count) { - size_t rows = src.size(); ColumnFixedString::Chars & data_to = dst.getChars(); - data_to.resize_fill(n * rows); + data_to.resize_fill(n * input_rows_count); ColumnFixedString::Offset offset = 0; - for (size_t i = 0; i < rows; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { std::string_view data = src.getDataAt(i).toView(); @@ -327,11 +323,10 @@ private: } } - static void NO_INLINE executeContiguousToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n) + static void NO_INLINE executeContiguousToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n, size_t input_rows_count) { - size_t rows = src.size(); ColumnFixedString::Chars & data_to = dst.getChars(); - data_to.resize(n * rows); + data_to.resize(n * input_rows_count); #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ memcpy(data_to.data(), src.getRawData().data(), data_to.size()); @@ -340,15 +335,14 @@ private: #endif } - static void NO_INLINE executeToString(const IColumn & src, ColumnString & dst) + static void NO_INLINE executeToString(const IColumn & src, ColumnString & dst, size_t input_rows_count) { - size_t rows = src.size(); ColumnString::Chars & data_to = dst.getChars(); ColumnString::Offsets & offsets_to = dst.getOffsets(); - offsets_to.resize(rows); + offsets_to.resize(input_rows_count); ColumnString::Offset offset = 0; - for (size_t i = 0; i < rows; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { StringRef data = src.getDataAt(i); diff --git a/src/Functions/reverse.cpp b/src/Functions/reverse.cpp index d23e48b8d42..94b6634ffbd 100644 --- a/src/Functions/reverse.cpp +++ b/src/Functions/reverse.cpp @@ -55,19 +55,19 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr column = arguments[0].column; if (const ColumnString * col = checkAndGetColumn(column.get())) { auto col_res = ColumnString::create(); - ReverseImpl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets()); + ReverseImpl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), input_rows_count); return col_res; } else if (const ColumnFixedString * col_fixed = checkAndGetColumn(column.get())) { auto col_res = ColumnFixedString::create(col_fixed->getN()); - ReverseImpl::vectorFixed(col_fixed->getChars(), col_fixed->getN(), col_res->getChars()); + ReverseImpl::vectorFixed(col_fixed->getChars(), col_fixed->getN(), col_res->getChars(), input_rows_count); return col_res; } else diff --git a/src/Functions/reverse.h b/src/Functions/reverse.h index 5f999af4297..7c18d72769a 100644 --- a/src/Functions/reverse.h +++ b/src/Functions/reverse.h @@ -9,17 +9,18 @@ namespace DB */ struct ReverseImpl { - static void vector(const ColumnString::Chars & data, + static void vector( + const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { res_data.resize_exact(data.size()); res_offsets.assign(offsets); - size_t size = offsets.size(); ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { for (size_t j = prev_offset; j < offsets[i] - 1; ++j) res_data[j] = data[offsets[i] + prev_offset - 2 - j]; @@ -28,12 +29,15 @@ struct ReverseImpl } } - static void vectorFixed(const ColumnString::Chars & data, size_t n, ColumnString::Chars & res_data) + static void vectorFixed( + const ColumnString::Chars & data, + size_t n, + ColumnString::Chars & res_data, + size_t input_rows_count) { res_data.resize_exact(data.size()); - size_t size = data.size() / n; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) for (size_t j = i * n; j < (i + 1) * n; ++j) res_data[j] = data[(i * 2 + 1) * n - j - 1]; } diff --git a/src/Functions/reverseUTF8.cpp b/src/Functions/reverseUTF8.cpp index 1aee349fa8d..ca57d946b19 100644 --- a/src/Functions/reverseUTF8.cpp +++ b/src/Functions/reverseUTF8.cpp @@ -23,25 +23,25 @@ namespace */ struct ReverseUTF8Impl { - static void vector(const ColumnString::Chars & data, + static void vector( + const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { bool all_ascii = isAllASCII(data.data(), data.size()); if (all_ascii) { - ReverseImpl::vector(data, offsets, res_data, res_offsets); + ReverseImpl::vector(data, offsets, res_data, res_offsets, input_rows_count); return; } res_data.resize(data.size()); res_offsets.assign(offsets); - size_t size = offsets.size(); - ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { ColumnString::Offset j = prev_offset; while (j < offsets[i] - 1) @@ -73,7 +73,7 @@ struct ReverseUTF8Impl } } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot apply function reverseUTF8 to fixed string."); } diff --git a/src/Functions/seriesDecomposeSTL.cpp b/src/Functions/seriesDecomposeSTL.cpp index 720aa1e0799..1e1c41cafad 100644 --- a/src/Functions/seriesDecomposeSTL.cpp +++ b/src/Functions/seriesDecomposeSTL.cpp @@ -50,7 +50,7 @@ public: return std::make_shared(std::make_shared(std::make_shared())); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr array_ptr = arguments[0].column; const ColumnArray * array = checkAndGetColumn(array_ptr.get()); @@ -79,7 +79,7 @@ public: ColumnArray::Offset prev_src_offset = 0; - for (size_t i = 0; i < src_offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { UInt64 period; auto period_ptr = arguments[1].column->convertToFullColumnIfConst(); diff --git a/src/Functions/soundex.cpp b/src/Functions/soundex.cpp index fcf1523d1a3..e8bd1b664e3 100644 --- a/src/Functions/soundex.cpp +++ b/src/Functions/soundex.cpp @@ -79,14 +79,14 @@ struct SoundexImpl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - const size_t size = offsets.size(); - res_data.resize(size * (length + 1)); - res_offsets.resize(size); + res_data.resize(input_rows_count * (length + 1)); + res_offsets.resize(input_rows_count); size_t prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * value = reinterpret_cast(&data[prev_offset]); const size_t value_length = offsets[i] - prev_offset - 1; @@ -98,7 +98,7 @@ struct SoundexImpl } } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by soundex function"); } diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index cd6ca73c088..cf1634e0319 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -55,7 +55,7 @@ public: template - bool executeConstant(ColumnPtr col_times, ColumnString::Offsets & res_offsets, ColumnString::Chars & res_chars) const + bool executeConstant(ColumnPtr col_times, ColumnString::Offsets & res_offsets, ColumnString::Chars & res_chars, size_t input_rows_count) const { const ColumnConst & col_times_const = checkAndGetColumn(*col_times); @@ -71,12 +71,12 @@ public: checkRepeatTime(times); - res_offsets.resize(col_times->size()); - res_chars.resize(col_times->size() * (times + 1)); + res_offsets.resize(input_rows_count); + res_chars.resize(input_rows_count * (times + 1)); size_t pos = 0; - for (size_t i = 0; i < col_times->size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { memset(res_chars.begin() + pos, space, times); pos += times; @@ -92,20 +92,20 @@ public: template - bool executeVector(ColumnPtr col_times_, ColumnString::Offsets & res_offsets, ColumnString::Chars & res_chars) const + bool executeVector(ColumnPtr col_times_, ColumnString::Offsets & res_offsets, ColumnString::Chars & res_chars, size_t input_rows_count) const { auto * col_times = checkAndGetColumn(col_times_.get()); if (!col_times) return false; - res_offsets.resize(col_times->size()); - res_chars.resize(col_times->size() * 10); /// heuristic + res_offsets.resize(input_rows_count); + res_chars.resize(input_rows_count * 10); /// heuristic const PaddedPODArray & times_data = col_times->getData(); size_t pos = 0; - for (size_t i = 0; i < col_times->size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { typename DataType::FieldType times = times_data[i]; @@ -132,7 +132,7 @@ public: } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & col_num = arguments[0].column; @@ -143,26 +143,26 @@ public: if (const ColumnConst * col_num_const = checkAndGetColumn(col_num.get())) { - if ((executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars)) - || (executeConstant(col_num, res_offsets, res_chars))) + if ((executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count)) + || (executeConstant(col_num, res_offsets, res_chars, input_rows_count))) return col_res; } else { - if ((executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars)) - || (executeVector(col_num, res_offsets, res_chars))) + if ((executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count)) + || (executeVector(col_num, res_offsets, res_chars, input_rows_count))) return col_res; } diff --git a/src/Functions/stem.cpp b/src/Functions/stem.cpp index 5b845cf332b..b3be40f4022 100644 --- a/src/Functions/stem.cpp +++ b/src/Functions/stem.cpp @@ -32,7 +32,8 @@ struct StemImpl const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, - const String & language) + const String & language, + size_t input_rows_count) { sb_stemmer * stemmer = sb_stemmer_new(language.data(), "UTF_8"); @@ -45,7 +46,7 @@ struct StemImpl res_offsets.assign(offsets); UInt64 data_size = 0; - for (UInt64 i = 0; i < offsets.size(); ++i) + for (UInt64 i = 0; i < input_rows_count; ++i) { /// Note that accessing -1th element is valid for PaddedPODArray. size_t original_size = offsets[i] - offsets[i - 1]; @@ -101,7 +102,7 @@ public: ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & langcolumn = arguments[0].column; const auto & strcolumn = arguments[1].column; @@ -119,7 +120,7 @@ public: String language = lang_col->getValue(); auto col_res = ColumnString::create(); - StemImpl::vector(words_col->getChars(), words_col->getOffsets(), col_res->getChars(), col_res->getOffsets(), language); + StemImpl::vector(words_col->getChars(), words_col->getOffsets(), col_res->getChars(), col_res->getOffsets(), language, input_rows_count); return col_res; } }; diff --git a/src/Functions/stringCutToZero.cpp b/src/Functions/stringCutToZero.cpp index b9f742cd8bc..16e57d741fa 100644 --- a/src/Functions/stringCutToZero.cpp +++ b/src/Functions/stringCutToZero.cpp @@ -40,7 +40,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - static bool tryExecuteString(const IColumn * col, ColumnPtr & col_res) + static bool tryExecuteString(const IColumn * col, ColumnPtr & col_res, size_t input_rows_count) { const ColumnString * col_str_in = checkAndGetColumn(col); @@ -53,8 +53,7 @@ public: const ColumnString::Chars & in_vec = col_str_in->getChars(); const ColumnString::Offsets & in_offsets = col_str_in->getOffsets(); - size_t size = in_offsets.size(); - out_offsets.resize(size); + out_offsets.resize(input_rows_count); out_vec.resize(in_vec.size()); char * begin = reinterpret_cast(out_vec.data()); @@ -62,7 +61,7 @@ public: ColumnString::Offset current_in_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * pos_in = reinterpret_cast(&in_vec[current_in_offset]); size_t current_size = strlen(pos_in); @@ -87,7 +86,7 @@ public: } } - static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res) + static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res, size_t input_rows_count) { const ColumnFixedString * col_fstr_in = checkAndGetColumn(col); @@ -99,10 +98,8 @@ public: const ColumnString::Chars & in_vec = col_fstr_in->getChars(); - size_t size = col_fstr_in->size(); - - out_offsets.resize(size); - out_vec.resize(in_vec.size() + size); + out_offsets.resize(input_rows_count); + out_vec.resize(in_vec.size() + input_rows_count); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; @@ -110,7 +107,7 @@ public: size_t n = col_fstr_in->getN(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t current_size = strnlen(pos_in, n); memcpySmallAllowReadWriteOverflow15(pos, pos_in, current_size); @@ -133,12 +130,12 @@ public: } } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * column = arguments[0].column.get(); ColumnPtr res_column; - if (tryExecuteFixedString(column, res_column) || tryExecuteString(column, res_column)) + if (tryExecuteFixedString(column, res_column, input_rows_count) || tryExecuteString(column, res_column, input_rows_count)) return res_column; throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index eccd849059b..dc12ae193ff 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -68,7 +68,7 @@ namespace return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr column_string = arguments[0].column; ColumnPtr column_delim = arguments[1].column; @@ -110,10 +110,10 @@ namespace if (is_count_const) { Int64 count = column_count->getInt(0); - vectorConstant(col_str, delim, count, vec_res, offsets_res); + vectorConstant(col_str, delim, count, vec_res, offsets_res, input_rows_count); } else - vectorVector(col_str, delim, column_count.get(), vec_res, offsets_res); + vectorVector(col_str, delim, column_count.get(), vec_res, offsets_res, input_rows_count); } return column_res; } @@ -124,18 +124,18 @@ namespace const String & delim, const IColumn * count_column, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - size_t rows = str_column->size(); res_data.reserve(str_column->getChars().size() / 2); - res_offsets.reserve(rows); + res_offsets.reserve(input_rows_count); bool all_ascii = isAllASCII(str_column->getChars().data(), str_column->getChars().size()) && isAllASCII(reinterpret_cast(delim.data()), delim.size()); std::unique_ptr searcher = !is_utf8 || all_ascii ? nullptr : std::make_unique(delim.data(), delim.size()); - for (size_t i = 0; i < rows; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { StringRef str_ref = str_column->getDataAt(i); Int64 count = count_column->getInt(i); @@ -157,18 +157,18 @@ namespace const String & delim, Int64 count, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - size_t rows = str_column->size(); res_data.reserve(str_column->getChars().size() / 2); - res_offsets.reserve(rows); + res_offsets.reserve(input_rows_count); bool all_ascii = isAllASCII(str_column->getChars().data(), str_column->getChars().size()) && isAllASCII(reinterpret_cast(delim.data()), delim.size()); std::unique_ptr searcher = !is_utf8 || all_ascii ? nullptr : std::make_unique(delim.data(), delim.size()); - for (size_t i = 0; i < rows; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { StringRef str_ref = str_column->getDataAt(i); diff --git a/src/Functions/subtractNanoseconds.cpp b/src/Functions/subtractNanoseconds.cpp index fffb4eae37a..360c5ecd9cb 100644 --- a/src/Functions/subtractNanoseconds.cpp +++ b/src/Functions/subtractNanoseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(SubtractNanoseconds) { factory.registerFunction(); diff --git a/src/Functions/throwIf.cpp b/src/Functions/throwIf.cpp index becc6d2f772..e317c65c622 100644 --- a/src/Functions/throwIf.cpp +++ b/src/Functions/throwIf.cpp @@ -152,7 +152,7 @@ private: return nullptr; } - bool allow_custom_error_code_argument; + const bool allow_custom_error_code_argument; }; } diff --git a/src/Functions/timeSlots.cpp b/src/Functions/timeSlots.cpp index 040495ab023..b62bb20c64e 100644 --- a/src/Functions/timeSlots.cpp +++ b/src/Functions/timeSlots.cpp @@ -41,18 +41,17 @@ struct TimeSlotsImpl /// The following three methods process DateTime type static void vectorVector( const PaddedPODArray & starts, const PaddedPODArray & durations, UInt32 time_slot_size, - PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) + PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, + size_t input_rows_count) { if (time_slot_size == 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); - size_t size = starts.size(); - - result_offsets.resize(size); - result_values.reserve(size); + result_offsets.resize(input_rows_count); + result_values.reserve(input_rows_count); ColumnArray::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { for (UInt32 value = starts[i] / time_slot_size, end = (starts[i] + durations[i]) / time_slot_size; value <= end; ++value) { @@ -66,18 +65,17 @@ struct TimeSlotsImpl static void vectorConstant( const PaddedPODArray & starts, UInt32 duration, UInt32 time_slot_size, - PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) + PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, + size_t input_rows_count) { if (time_slot_size == 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); - size_t size = starts.size(); - - result_offsets.resize(size); - result_values.reserve(size); + result_offsets.resize(input_rows_count); + result_values.reserve(input_rows_count); ColumnArray::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { for (UInt32 value = starts[i] / time_slot_size, end = (starts[i] + duration) / time_slot_size; value <= end; ++value) { @@ -91,18 +89,17 @@ struct TimeSlotsImpl static void constantVector( UInt32 start, const PaddedPODArray & durations, UInt32 time_slot_size, - PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) + PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, + size_t input_rows_count) { if (time_slot_size == 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); - size_t size = durations.size(); - - result_offsets.resize(size); - result_values.reserve(size); + result_offsets.resize(input_rows_count); + result_values.reserve(input_rows_count); ColumnArray::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { for (UInt32 value = start / time_slot_size, end = (start + durations[i]) / time_slot_size; value <= end; ++value) { @@ -120,12 +117,11 @@ struct TimeSlotsImpl */ static NO_SANITIZE_UNDEFINED void vectorVector( const PaddedPODArray & starts, const PaddedPODArray & durations, Decimal64 time_slot_size, - PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, UInt16 dt_scale, UInt16 duration_scale, UInt16 time_slot_scale) + PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, UInt16 dt_scale, UInt16 duration_scale, UInt16 time_slot_scale, + size_t input_rows_count) { - size_t size = starts.size(); - - result_offsets.resize(size); - result_values.reserve(size); + result_offsets.resize(input_rows_count); + result_values.reserve(input_rows_count); /// Modify all units to have same scale UInt16 max_scale = std::max({dt_scale, duration_scale, time_slot_scale}); @@ -139,7 +135,7 @@ struct TimeSlotsImpl if (time_slot_size == 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { for (DateTime64 value = (starts[i] * dt_multiplier) / time_slot_size, end = (starts[i] * dt_multiplier + durations[i] * dur_multiplier) / time_slot_size; value <= end; value += 1) { @@ -152,12 +148,11 @@ struct TimeSlotsImpl static NO_SANITIZE_UNDEFINED void vectorConstant( const PaddedPODArray & starts, Decimal64 duration, Decimal64 time_slot_size, - PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, UInt16 dt_scale, UInt16 duration_scale, UInt16 time_slot_scale) + PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, UInt16 dt_scale, UInt16 duration_scale, UInt16 time_slot_scale, + size_t input_rows_count) { - size_t size = starts.size(); - - result_offsets.resize(size); - result_values.reserve(size); + result_offsets.resize(input_rows_count); + result_values.reserve(input_rows_count); /// Modify all units to have same scale UInt16 max_scale = std::max({dt_scale, duration_scale, time_slot_scale}); @@ -172,7 +167,7 @@ struct TimeSlotsImpl if (time_slot_size == 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { for (DateTime64 value = (starts[i] * dt_multiplier) / time_slot_size, end = (starts[i] * dt_multiplier + duration) / time_slot_size; value <= end; value += 1) { @@ -185,12 +180,11 @@ struct TimeSlotsImpl static NO_SANITIZE_UNDEFINED void constantVector( DateTime64 start, const PaddedPODArray & durations, Decimal64 time_slot_size, - PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, UInt16 dt_scale, UInt16 duration_scale, UInt16 time_slot_scale) + PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets, UInt16 dt_scale, UInt16 duration_scale, UInt16 time_slot_scale, + size_t input_rows_count) { - size_t size = durations.size(); - - result_offsets.resize(size); - result_values.reserve(size); + result_offsets.resize(input_rows_count); + result_values.reserve(input_rows_count); /// Modify all units to have same scale UInt16 max_scale = std::max({dt_scale, duration_scale, time_slot_scale}); @@ -205,7 +199,7 @@ struct TimeSlotsImpl if (time_slot_size == 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { for (DateTime64 value = start / time_slot_size, end = (start + durations[i] * dur_multiplier) / time_slot_size; value <= end; value += 1) { @@ -282,7 +276,7 @@ public: } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { if (WhichDataType(arguments[0].type).isDateTime()) { @@ -308,17 +302,17 @@ public: if (dt_starts && durations) { - TimeSlotsImpl::vectorVector(dt_starts->getData(), durations->getData(), time_slot_size, res_values, res->getOffsets()); + TimeSlotsImpl::vectorVector(dt_starts->getData(), durations->getData(), time_slot_size, res_values, res->getOffsets(), input_rows_count); return res; } else if (dt_starts && const_durations) { - TimeSlotsImpl::vectorConstant(dt_starts->getData(), const_durations->getValue(), time_slot_size, res_values, res->getOffsets()); + TimeSlotsImpl::vectorConstant(dt_starts->getData(), const_durations->getValue(), time_slot_size, res_values, res->getOffsets(), input_rows_count); return res; } else if (dt_const_starts && durations) { - TimeSlotsImpl::constantVector(dt_const_starts->getValue(), durations->getData(), time_slot_size, res_values, res->getOffsets()); + TimeSlotsImpl::constantVector(dt_const_starts->getValue(), durations->getData(), time_slot_size, res_values, res->getOffsets(), input_rows_count); return res; } } @@ -353,21 +347,21 @@ public: if (starts && durations) { TimeSlotsImpl::vectorVector(starts->getData(), durations->getData(), time_slot_size, res_values, res->getOffsets(), - start_time_scale, duration_scale, time_slot_scale); + start_time_scale, duration_scale, time_slot_scale, input_rows_count); return res; } else if (starts && const_durations) { TimeSlotsImpl::vectorConstant( starts->getData(), const_durations->getValue(), time_slot_size, res_values, res->getOffsets(), - start_time_scale, duration_scale, time_slot_scale); + start_time_scale, duration_scale, time_slot_scale, input_rows_count); return res; } else if (const_starts && durations) { TimeSlotsImpl::constantVector( const_starts->getValue(), durations->getData(), time_slot_size, res_values, res->getOffsets(), - start_time_scale, duration_scale, time_slot_scale); + start_time_scale, duration_scale, time_slot_scale, input_rows_count); return res; } } diff --git a/src/Functions/toDecimalString.cpp b/src/Functions/toDecimalString.cpp index 523948a5396..3566ebc93ad 100644 --- a/src/Functions/toDecimalString.cpp +++ b/src/Functions/toDecimalString.cpp @@ -54,9 +54,9 @@ private: /// For operations with Integer/Float template void vectorConstant(const FromVectorType & vec_from, UInt8 precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, + size_t input_rows_count) const { - size_t input_rows_count = vec_from.size(); result_offsets.resize(input_rows_count); /// Buffer is used here and in functions below because resulting size cannot be precisely anticipated, @@ -74,9 +74,9 @@ private: template void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, + size_t input_rows_count) const { - size_t input_rows_count = vec_from.size(); result_offsets.resize(input_rows_count); WriteBufferFromVector buf_to(vec_to); @@ -98,7 +98,8 @@ private: /// For operations with Decimal template void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale, + size_t input_rows_count) const { /// There are no more than 77 meaning digits (as it is the max length of UInt256). So we can limit it with 77. constexpr size_t max_digits = std::numeric_limits::digits10; @@ -107,7 +108,6 @@ private: "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); WriteBufferFromVector buf_to(vec_to); - size_t input_rows_count = vec_from.size(); result_offsets.resize(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) @@ -121,9 +121,9 @@ private: template void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale, + size_t input_rows_count) const { - size_t input_rows_count = vec_from.size(); result_offsets.resize(input_rows_count); WriteBufferFromVector buf_to(vec_to); @@ -182,28 +182,28 @@ private: } public: - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { switch (arguments[0].type->getTypeId()) { - case TypeIndex::UInt8: return executeType(arguments); - case TypeIndex::UInt16: return executeType(arguments); - case TypeIndex::UInt32: return executeType(arguments); - case TypeIndex::UInt64: return executeType(arguments); - case TypeIndex::UInt128: return executeType(arguments); - case TypeIndex::UInt256: return executeType(arguments); - case TypeIndex::Int8: return executeType(arguments); - case TypeIndex::Int16: return executeType(arguments); - case TypeIndex::Int32: return executeType(arguments); - case TypeIndex::Int64: return executeType(arguments); - case TypeIndex::Int128: return executeType(arguments); - case TypeIndex::Int256: return executeType(arguments); - case TypeIndex::Float32: return executeType(arguments); - case TypeIndex::Float64: return executeType(arguments); - case TypeIndex::Decimal32: return executeType(arguments); - case TypeIndex::Decimal64: return executeType(arguments); - case TypeIndex::Decimal128: return executeType(arguments); - case TypeIndex::Decimal256: return executeType(arguments); + case TypeIndex::UInt8: return executeType(arguments, input_rows_count); + case TypeIndex::UInt16: return executeType(arguments, input_rows_count); + case TypeIndex::UInt32: return executeType(arguments, input_rows_count); + case TypeIndex::UInt64: return executeType(arguments, input_rows_count); + case TypeIndex::UInt128: return executeType(arguments, input_rows_count); + case TypeIndex::UInt256: return executeType(arguments, input_rows_count); + case TypeIndex::Int8: return executeType(arguments, input_rows_count); + case TypeIndex::Int16: return executeType(arguments, input_rows_count); + case TypeIndex::Int32: return executeType(arguments, input_rows_count); + case TypeIndex::Int64: return executeType(arguments, input_rows_count); + case TypeIndex::Int128: return executeType(arguments, input_rows_count); + case TypeIndex::Int256: return executeType(arguments, input_rows_count); + case TypeIndex::Float32: return executeType(arguments, input_rows_count); + case TypeIndex::Float64: return executeType(arguments, input_rows_count); + case TypeIndex::Decimal32: return executeType(arguments, input_rows_count); + case TypeIndex::Decimal64: return executeType(arguments, input_rows_count); + case TypeIndex::Decimal128: return executeType(arguments, input_rows_count); + case TypeIndex::Decimal256: return executeType(arguments, input_rows_count); default: throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()); @@ -212,7 +212,7 @@ public: private: template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { const auto * precision_col = checkAndGetColumn>(arguments[1].column.get()); const auto * precision_col_const = checkAndGetColumnConst>(arguments[1].column.get()); @@ -230,9 +230,9 @@ private: { UInt8 from_scale = from_col->getScale(); if (precision_col_const) - vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets, from_scale); + vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets, from_scale, input_rows_count); else if (precision_col) - vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale); + vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale, input_rows_count); else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function formatDecimal", arguments[1].column->getName()); } @@ -245,9 +245,9 @@ private: if (from_col) { if (precision_col_const) - vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets); + vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets, input_rows_count); else if (precision_col) - vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets); + vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, input_rows_count); else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function formatDecimal", arguments[1].column->getName()); diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 50442d1b448..21b7cf895d2 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -147,19 +147,20 @@ public: std::unreachable(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const auto & time_column = arguments[0]; const auto & interval_column = arguments[1]; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); - auto result_column = dispatchForTimeColumn(time_column, interval_column, result_type, time_zone); + auto result_column = dispatchForTimeColumn(time_column, interval_column, result_type, time_zone, input_rows_count); return result_column; } private: ColumnPtr dispatchForTimeColumn( const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, - const DataTypePtr & result_type, const DateLUTImpl & time_zone) const + const DataTypePtr & result_type, const DateLUTImpl & time_zone, + size_t input_rows_count) const { const auto & time_column_type = *time_column.type.get(); const auto & time_column_col = *time_column.column.get(); @@ -170,19 +171,19 @@ private: auto scale = assert_cast(time_column_type).getScale(); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone, scale); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone, input_rows_count, scale); } else if (isDateTime(time_column_type)) { const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone, input_rows_count); } else if (isDate(time_column_type)) { const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone, input_rows_count); } throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, DateTime or DateTime64", getName()); } @@ -190,7 +191,7 @@ private: template ColumnPtr dispatchForIntervalColumn( const TimeDataType & time_data_type, const TimeColumnType & time_column, const ColumnWithTypeAndName & interval_column, - const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale = 1) const + const DataTypePtr & result_type, const DateLUTImpl & time_zone, size_t input_rows_count, UInt16 scale = 1) const { const auto * interval_type = checkAndGetDataType(interval_column.type.get()); if (!interval_type) @@ -207,27 +208,27 @@ private: switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { case IntervalKind::Kind::Nanosecond: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Microsecond: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Millisecond: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Second: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Minute: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Hour: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Day: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Week: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Month: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Quarter: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); case IntervalKind::Kind::Year: - return execute(time_data_type, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, result_type, time_zone, input_rows_count, scale); } std::unreachable(); @@ -236,22 +237,21 @@ private: template ColumnPtr execute( const TimeDataType &, const TimeColumnType & time_column_type, Int64 num_units, - const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale) const + const DataTypePtr & result_type, const DateLUTImpl & time_zone, size_t input_rows_count, UInt16 scale) const { using ResultColumnType = typename ResultDataType::ColumnType; using ResultFieldType = typename ResultDataType::FieldType; const auto & time_data = time_column_type.getData(); - size_t size = time_data.size(); auto result_col = result_type->createColumn(); auto * col_to = assert_cast(result_col.get()); auto & result_data = col_to->getData(); - result_data.resize(size); + result_data.resize(input_rows_count); Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); - for (size_t i = 0; i != size; ++i) + for (size_t i = 0; i != input_rows_count; ++i) result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); return result_col; diff --git a/src/Functions/toValidUTF8.cpp b/src/Functions/toValidUTF8.cpp index 41d29d9c494..376732256b0 100644 --- a/src/Functions/toValidUTF8.cpp +++ b/src/Functions/toValidUTF8.cpp @@ -128,16 +128,16 @@ struct ToValidUTF8Impl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - const size_t offsets_size = offsets.size(); /// It can be larger than that, but we believe it is unlikely to happen. res_data.resize(data.size()); - res_offsets.resize(offsets_size); + res_offsets.resize(input_rows_count); size_t prev_offset = 0; WriteBufferFromVector write_buffer(res_data); - for (size_t i = 0; i < offsets_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * haystack_data = reinterpret_cast(&data[prev_offset]); const size_t haystack_size = offsets[i] - prev_offset - 1; @@ -149,7 +149,7 @@ struct ToValidUTF8Impl write_buffer.finalize(); } - [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by toValidUTF8 function"); } diff --git a/src/Functions/tokenExtractors.cpp b/src/Functions/tokenExtractors.cpp index e7dcb5cced3..1bbf313fbae 100644 --- a/src/Functions/tokenExtractors.cpp +++ b/src/Functions/tokenExtractors.cpp @@ -73,7 +73,7 @@ public: return std::make_shared(std::make_shared()); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto column_offsets = ColumnArray::ColumnOffsets::create(); @@ -90,9 +90,9 @@ public: auto input_column = arguments[0].column; if (const auto * column_string = checkAndGetColumn(input_column.get())) - executeImpl(extractor, *column_string, *result_column_string, *column_offsets); + executeImpl(extractor, *column_string, *result_column_string, *column_offsets, input_rows_count); else if (const auto * column_fixed_string = checkAndGetColumn(input_column.get())) - executeImpl(extractor, *column_fixed_string, *result_column_string, *column_offsets); + executeImpl(extractor, *column_fixed_string, *result_column_string, *column_offsets, input_rows_count); return ColumnArray::create(std::move(result_column_string), std::move(column_offsets)); } @@ -105,9 +105,9 @@ public: auto input_column = arguments[0].column; if (const auto * column_string = checkAndGetColumn(input_column.get())) - executeImpl(extractor, *column_string, *result_column_string, *column_offsets); + executeImpl(extractor, *column_string, *result_column_string, *column_offsets, input_rows_count); else if (const auto * column_fixed_string = checkAndGetColumn(input_column.get())) - executeImpl(extractor, *column_fixed_string, *result_column_string, *column_offsets); + executeImpl(extractor, *column_fixed_string, *result_column_string, *column_offsets, input_rows_count); return ColumnArray::create(std::move(result_column_string), std::move(column_offsets)); } @@ -120,15 +120,15 @@ private: const ExtractorType & extractor, StringColumnType & input_data_column, ResultStringColumnType & result_data_column, - ColumnArray::ColumnOffsets & offsets_column) const + ColumnArray::ColumnOffsets & offsets_column, + size_t input_rows_count) const { size_t current_tokens_size = 0; auto & offsets_data = offsets_column.getData(); - size_t column_size = input_data_column.size(); - offsets_data.resize(column_size); + offsets_data.resize(input_rows_count); - for (size_t i = 0; i < column_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { auto data = input_data_column.getDataAt(i); diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 0dbc9946710..0dfc9197845 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -138,8 +138,7 @@ namespace } } - ColumnPtr executeImpl( - const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { std::call_once(once, [&] { initialize(arguments, result_type); }); @@ -174,30 +173,30 @@ namespace } else if (cache.table_num_to_idx) { - if (!executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted) - && !executeNum>(in, *column_result, default_non_const, *in_casted)) + if (!executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count) + && !executeNum>(in, *column_result, default_non_const, *in_casted, input_rows_count)) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName()); } } else if (cache.table_string_to_idx) { - if (!executeString(in, *column_result, default_non_const, *in_casted)) - executeContiguous(in, *column_result, default_non_const, *in_casted); + if (!executeString(in, *column_result, default_non_const, *in_casted, input_rows_count)) + executeContiguous(in, *column_result, default_non_const, *in_casted, input_rows_count); } else if (cache.table_anything_to_idx) { - executeAnything(in, *column_result, default_non_const, *in_casted); + executeAnything(in, *column_result, default_non_const, *in_casted, input_rows_count); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "State of the function `transform` is not initialized"); @@ -218,12 +217,11 @@ namespace return impl->execute(args, result_type, input_rows_count); } - void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const + void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted, size_t input_rows_count) const { - const size_t size = in->size(); const auto & table = *cache.table_anything_to_idx; - column_result.reserve(size); - for (size_t i = 0; i < size; ++i) + column_result.reserve(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) { SipHash hash; in->updateHashWithValue(i, hash); @@ -240,12 +238,11 @@ namespace } } - void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const + void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted, size_t input_rows_count) const { - const size_t size = in->size(); const auto & table = *cache.table_string_to_idx; - column_result.reserve(size); - for (size_t i = 0; i < size; ++i) + column_result.reserve(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) { const auto * it = table.find(in->getDataAt(i)); if (it) @@ -260,7 +257,7 @@ namespace } template - bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const + bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted, size_t input_rows_count) const { const auto * const in = checkAndGetColumn(in_untyped); if (!in) @@ -270,24 +267,23 @@ namespace if constexpr (std::is_same_v, T> || std::is_same_v, T>) in_scale = in->getScale(); - if (!executeNumToString(pod, column_result, default_non_const) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale) - && !executeNumToNum>(pod, column_result, default_non_const, in_scale)) + if (!executeNumToString(pod, column_result, default_non_const, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count) + && !executeNumToNum>(pod, column_result, default_non_const, in_scale, input_rows_count)) { - const size_t size = pod.size(); const auto & table = *cache.table_num_to_idx; - column_result.reserve(size); - for (size_t i = 0; i < size; ++i) + column_result.reserve(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) { const auto * it = table.find(bit_cast(pod[i])); if (it) @@ -304,14 +300,13 @@ namespace } template - bool executeNumToString(const PaddedPODArray & pod, IColumn & column_result, const ColumnPtr default_non_const) const + bool executeNumToString(const PaddedPODArray & pod, IColumn & column_result, const ColumnPtr default_non_const, size_t input_rows_count) const { auto * out = typeid_cast(&column_result); if (!out) return false; auto & out_offs = out->getOffsets(); - const size_t size = pod.size(); - out_offs.resize(size); + out_offs.resize(input_rows_count); auto & out_chars = out->getChars(); const auto * to_col = assert_cast(cache.to_column.get()); @@ -326,14 +321,14 @@ namespace const auto & def_offs = def->getOffsets(); const auto * def_data = def_chars.data(); auto def_size = def_offs[0]; - executeNumToStringHelper(table, pod, out_chars, out_offs, to_chars, to_offs, def_data, def_size, size); + executeNumToStringHelper(table, pod, out_chars, out_offs, to_chars, to_offs, def_data, def_size, input_rows_count); } else { const auto * def = assert_cast(default_non_const.get()); const auto & def_chars = def->getChars(); const auto & def_offs = def->getOffsets(); - executeNumToStringHelper(table, pod, out_chars, out_offs, to_chars, to_offs, def_chars, def_offs, size); + executeNumToStringHelper(table, pod, out_chars, out_offs, to_chars, to_offs, def_chars, def_offs, input_rows_count); } return true; } @@ -348,10 +343,10 @@ namespace const ColumnString::Offsets & to_offsets, const DefData & def_data, const DefOffs & def_offsets, - const size_t size) const + size_t input_rows_count) const { size_t out_cur_off = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char8_t * to = nullptr; size_t to_size = 0; @@ -383,14 +378,13 @@ namespace template bool executeNumToNum( - const PaddedPODArray & pod, IColumn & column_result, const ColumnPtr default_non_const, const UInt32 in_scale) const + const PaddedPODArray & pod, IColumn & column_result, ColumnPtr default_non_const, UInt32 in_scale, size_t input_rows_count) const { auto * out = typeid_cast(&column_result); if (!out) return false; auto & out_pod = out->getData(); - const size_t size = pod.size(); - out_pod.resize(size); + out_pod.resize(input_rows_count); UInt32 out_scale = 0; if constexpr (std::is_same_v, T> || std::is_same_v, T>) out_scale = out->getScale(); @@ -400,15 +394,15 @@ namespace if (cache.default_column) { const auto const_def = assert_cast(cache.default_column.get())->getData()[0]; - executeNumToNumHelper(table, pod, out_pod, to_pod, const_def, size, out_scale, out_scale); + executeNumToNumHelper(table, pod, out_pod, to_pod, const_def, input_rows_count, out_scale, out_scale); } else if (default_non_const) { const auto & nconst_def = assert_cast(default_non_const.get())->getData(); - executeNumToNumHelper(table, pod, out_pod, to_pod, nconst_def, size, out_scale, out_scale); + executeNumToNumHelper(table, pod, out_pod, to_pod, nconst_def, input_rows_count, out_scale, out_scale); } else - executeNumToNumHelper(table, pod, out_pod, to_pod, pod, size, out_scale, in_scale); + executeNumToNumHelper(table, pod, out_pod, to_pod, pod, input_rows_count, out_scale, in_scale); return true; } @@ -419,11 +413,11 @@ namespace PaddedPODArray & out_pod, const PaddedPODArray & to_pod, const Def & def, - const size_t size, - const UInt32 out_scale, - const UInt32 def_scale) const + size_t input_rows_count, + UInt32 out_scale, + UInt32 def_scale) const { - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const auto * it = table.find(bit_cast(pod[i])); if (it) @@ -451,7 +445,7 @@ namespace } } - bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const + bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted, size_t input_rows_count) const { const auto * const in = checkAndGetColumn(in_untyped); if (!in) @@ -459,19 +453,19 @@ namespace const auto & data = in->getChars(); const auto & offsets = in->getOffsets(); - if (!executeStringToString(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const) - && !executeStringToNum>(data, offsets, column_result, default_non_const)) + if (!executeStringToString(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count) + && !executeStringToNum>(data, offsets, column_result, default_non_const, input_rows_count)) { const size_t size = offsets.size(); const auto & table = *cache.table_string_to_idx; @@ -498,14 +492,14 @@ namespace const ColumnString::Chars & data, const ColumnString::Offsets & offsets, IColumn & column_result, - const ColumnPtr default_non_const) const + const ColumnPtr default_non_const, + size_t input_rows_count) const { auto * out = typeid_cast(&column_result); if (!out) return false; auto & out_offs = out->getOffsets(); - const size_t size = offsets.size(); - out_offs.resize(size); + out_offs.resize(input_rows_count); auto & out_chars = out->getChars(); const auto * to_col = assert_cast(cache.to_column.get()); @@ -520,18 +514,18 @@ namespace const auto & def_offs = def->getOffsets(); const auto * def_data = def_chars.data(); auto def_size = def_offs[0]; - executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, def_data, def_size, size); + executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, def_data, def_size, input_rows_count); } else if (default_non_const) { const auto * def = assert_cast(default_non_const.get()); const auto & def_chars = def->getChars(); const auto & def_offs = def->getOffsets(); - executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, def_chars, def_offs, size); + executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, def_chars, def_offs, input_rows_count); } else { - executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, data, offsets, size); + executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, data, offsets, input_rows_count); } return true; } @@ -547,11 +541,11 @@ namespace const ColumnString::Offsets & to_offsets, const DefData & def_data, const DefOffs & def_offsets, - const size_t size) const + size_t input_rows_count) const { ColumnString::Offset current_offset = 0; size_t out_cur_off = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char8_t * to = nullptr; size_t to_size = 0; @@ -588,26 +582,26 @@ namespace const ColumnString::Chars & data, const ColumnString::Offsets & offsets, IColumn & column_result, - const ColumnPtr default_non_const) const + const ColumnPtr default_non_const, + size_t input_rows_count) const { auto * out = typeid_cast(&column_result); if (!out) return false; auto & out_pod = out->getData(); - const size_t size = offsets.size(); - out_pod.resize(size); + out_pod.resize(input_rows_count); const auto & to_pod = assert_cast(cache.to_column.get())->getData(); const auto & table = *cache.table_string_to_idx; if (cache.default_column) { const auto const_def = assert_cast(cache.default_column.get())->getData()[0]; - executeStringToNumHelper(table, data, offsets, out_pod, to_pod, const_def, size); + executeStringToNumHelper(table, data, offsets, out_pod, to_pod, const_def, input_rows_count); } else { const auto & nconst_def = assert_cast(default_non_const.get())->getData(); - executeStringToNumHelper(table, data, offsets, out_pod, to_pod, nconst_def, size); + executeStringToNumHelper(table, data, offsets, out_pod, to_pod, nconst_def, input_rows_count); } return true; } @@ -620,10 +614,10 @@ namespace PaddedPODArray & out_pod, const PaddedPODArray & to_pod, const Def & def, - const size_t size) const + size_t input_rows_count) const { ColumnString::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const StringRef ref{&data[current_offset], offsets[i] - current_offset - 1}; current_offset = offsets[i]; diff --git a/src/Functions/translate.cpp b/src/Functions/translate.cpp index 2df08a5664e..366640d7d20 100644 --- a/src/Functions/translate.cpp +++ b/src/Functions/translate.cpp @@ -52,7 +52,8 @@ struct TranslateImpl const std::string & map_from, const std::string & map_to, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { Map map; fillMapWithValues(map, map_from, map_to); @@ -62,7 +63,7 @@ struct TranslateImpl UInt8 * dst = res_data.data(); - for (UInt64 i = 0; i < offsets.size(); ++i) + for (UInt64 i = 0; i < input_rows_count; ++i) { const UInt8 * src = data.data() + offsets[i - 1]; const UInt8 * src_end = data.data() + offsets[i] - 1; @@ -175,19 +176,20 @@ struct TranslateUTF8Impl const std::string & map_from, const std::string & map_to, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { MapASCII map_ascii; MapUTF8 map; fillMapWithValues(map_ascii, map, map_from, map_to); res_data.resize(data.size()); - res_offsets.resize(offsets.size()); + res_offsets.resize(input_rows_count); UInt8 * dst = res_data.data(); UInt64 data_size = 0; - for (UInt64 i = 0; i < offsets.size(); ++i) + for (UInt64 i = 0; i < input_rows_count; ++i) { const UInt8 * src = data.data() + offsets[i - 1]; const UInt8 * src_end = data.data() + offsets[i] - 1; @@ -311,7 +313,7 @@ public: } } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr column_src = arguments[0].column; const ColumnPtr column_map_from = arguments[1].column; @@ -330,7 +332,7 @@ public: if (const ColumnString * col = checkAndGetColumn(column_src.get())) { auto col_res = ColumnString::create(); - Impl::vector(col->getChars(), col->getOffsets(), map_from, map_to, col_res->getChars(), col_res->getOffsets()); + Impl::vector(col->getChars(), col->getOffsets(), map_from, map_to, col_res->getChars(), col_res->getOffsets(), input_rows_count); return col_res; } else if (const ColumnFixedString * col_fixed = checkAndGetColumn(column_src.get())) diff --git a/src/Functions/trim.cpp b/src/Functions/trim.cpp index 1f0011b8e99..5703e871423 100644 --- a/src/Functions/trim.cpp +++ b/src/Functions/trim.cpp @@ -43,10 +43,10 @@ public: const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - size_t size = offsets.size(); - res_offsets.resize_exact(size); + res_offsets.resize_exact(input_rows_count); res_data.reserve_exact(data.size()); size_t prev_offset = 0; @@ -55,7 +55,7 @@ public: const UInt8 * start; size_t length; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { execute(reinterpret_cast(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length); @@ -69,7 +69,7 @@ public: } } - static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Functions trimLeft, trimRight and trimBoth cannot work with FixedString argument"); } diff --git a/src/Functions/tupleToNameValuePairs.cpp b/src/Functions/tupleToNameValuePairs.cpp index 998e0da4f0c..92734d3d1fc 100644 --- a/src/Functions/tupleToNameValuePairs.cpp +++ b/src/Functions/tupleToNameValuePairs.cpp @@ -99,16 +99,16 @@ public: return std::make_shared(item_data_type); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * tuple_col = arguments[0].column.get(); const DataTypeTuple * tuple = checkAndGetDataType(arguments[0].type.get()); - const auto * tuple_col_concrete = assert_cast(tuple_col); + const auto * tuple_col_concrete = assert_cast(tuple_col); auto keys = ColumnString::create(); MutableColumnPtr values = tuple_col_concrete->getColumn(0).cloneEmpty(); auto offsets = ColumnVector::create(); - for (size_t row = 0; row < tuple_col_concrete->size(); ++row) + for (size_t row = 0; row < input_rows_count; ++row) { for (size_t col = 0; col < tuple_col_concrete->tupleSize(); ++col) { diff --git a/src/Functions/visibleWidth.cpp b/src/Functions/visibleWidth.cpp index ebd4a1ff713..3e70418a456 100644 --- a/src/Functions/visibleWidth.cpp +++ b/src/Functions/visibleWidth.cpp @@ -66,9 +66,8 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & src = arguments[0]; - size_t size = input_rows_count; - auto res_col = ColumnUInt64::create(size); + auto res_col = ColumnUInt64::create(input_rows_count); auto & res_data = assert_cast(*res_col).getData(); /// For simplicity reasons, the function is implemented by serializing into temporary buffer. @@ -76,7 +75,7 @@ public: String tmp; FormatSettings format_settings; auto serialization = src.type->getDefaultSerialization(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { { WriteBufferFromString out(tmp); diff --git a/src/Functions/widthBucket.cpp b/src/Functions/widthBucket.cpp index d007cc968f0..ba24362034f 100644 --- a/src/Functions/widthBucket.cpp +++ b/src/Functions/widthBucket.cpp @@ -166,12 +166,12 @@ class FunctionWidthBucket : public IFunction result_column->reserve(1); auto & result_data = result_column->getData(); - for (const auto row_index : collections::range(0, input_rows_count)) + for (size_t row = 0; row < input_rows_count; ++row) { - const auto operand = getValue(operands_col_const, operands_vec, row_index); - const auto low = getValue(lows_col_const, lows_vec, row_index); - const auto high = getValue(highs_col_const, highs_vec, row_index); - const auto count = getValue(counts_col_const, counts_vec, row_index); + const auto operand = getValue(operands_col_const, operands_vec, row); + const auto low = getValue(lows_col_const, lows_vec, row); + const auto high = getValue(highs_col_const, highs_vec, row); + const auto count = getValue(counts_col_const, counts_vec, row); result_data.push_back(calculate(operand, low, high, count)); } diff --git a/src/IO/Archives/LibArchiveReader.cpp b/src/IO/Archives/LibArchiveReader.cpp index e3fe63fa40d..31bad4d6638 100644 --- a/src/IO/Archives/LibArchiveReader.cpp +++ b/src/IO/Archives/LibArchiveReader.cpp @@ -321,7 +321,7 @@ public: off_t getPosition() override { throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition not supported when reading from archive"); } String getFileName() const override { return handle.getFileName(); } - size_t getFileSize() override { return handle.getFileInfo().uncompressed_size; } + std::optional tryGetFileSize() override { return handle.getFileInfo().uncompressed_size; } Handle releaseHandle() && { return std::move(handle); } diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp index 2a9b7a43519..12b07d550c2 100644 --- a/src/IO/Archives/ZipArchiveReader.cpp +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -317,7 +317,7 @@ public: String getFileName() const override { return handle.getFileName(); } - size_t getFileSize() override { return handle.getFileInfo().uncompressed_size; } + std::optional tryGetFileSize() override { return handle.getFileInfo().uncompressed_size; } /// Releases owned handle to pass it to an enumerator. HandleHolder releaseHandle() && diff --git a/src/IO/AsynchronousReadBufferFromFile.cpp b/src/IO/AsynchronousReadBufferFromFile.cpp index c6fe16a7f14..4d148a5c9f9 100644 --- a/src/IO/AsynchronousReadBufferFromFile.cpp +++ b/src/IO/AsynchronousReadBufferFromFile.cpp @@ -93,7 +93,10 @@ void AsynchronousReadBufferFromFile::close() return; if (0 != ::close(fd)) + { + fd = -1; throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); + } fd = -1; } diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp index f8c00d62732..6c4bd09b76f 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp @@ -244,7 +244,7 @@ void AsynchronousReadBufferFromFileDescriptor::rewind() file_offset_of_buffer_end = 0; } -size_t AsynchronousReadBufferFromFileDescriptor::getFileSize() +std::optional AsynchronousReadBufferFromFileDescriptor::tryGetFileSize() { return getSizeFromFileDescriptor(fd, getFileName()); } diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.h b/src/IO/AsynchronousReadBufferFromFileDescriptor.h index 82659b1aca7..097979fbe00 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.h +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.h @@ -68,7 +68,7 @@ public: /// Seek to the beginning, discarding already read data if any. Useful to reread file that changes on every read. void rewind(); - size_t getFileSize() override; + std::optional tryGetFileSize() override; size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; } diff --git a/src/IO/ConcatSeekableReadBuffer.h b/src/IO/ConcatSeekableReadBuffer.h index c8c16c5d887..609f0dc25b8 100644 --- a/src/IO/ConcatSeekableReadBuffer.h +++ b/src/IO/ConcatSeekableReadBuffer.h @@ -21,7 +21,7 @@ public: off_t seek(off_t off, int whence) override; off_t getPosition() override; - size_t getFileSize() override { return total_size; } + std::optional tryGetFileSize() override { return total_size; } private: bool nextImpl() override; diff --git a/src/IO/MMapReadBufferFromFile.cpp b/src/IO/MMapReadBufferFromFile.cpp index d3eb11c920d..d0865269f1b 100644 --- a/src/IO/MMapReadBufferFromFile.cpp +++ b/src/IO/MMapReadBufferFromFile.cpp @@ -77,7 +77,10 @@ void MMapReadBufferFromFile::close() finish(); if (0 != ::close(fd)) + { + fd = -1; throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); + } fd = -1; metric_increment.destroy(); diff --git a/src/IO/MMapReadBufferFromFileDescriptor.cpp b/src/IO/MMapReadBufferFromFileDescriptor.cpp index f27828f71b2..83dd192de54 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.cpp +++ b/src/IO/MMapReadBufferFromFileDescriptor.cpp @@ -87,7 +87,7 @@ off_t MMapReadBufferFromFileDescriptor::seek(off_t offset, int whence) return new_pos; } -size_t MMapReadBufferFromFileDescriptor::getFileSize() +std::optional MMapReadBufferFromFileDescriptor::tryGetFileSize() { return getSizeFromFileDescriptor(getFD(), getFileName()); } diff --git a/src/IO/MMapReadBufferFromFileDescriptor.h b/src/IO/MMapReadBufferFromFileDescriptor.h index f774538374a..de44ec3f9d8 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.h +++ b/src/IO/MMapReadBufferFromFileDescriptor.h @@ -38,7 +38,7 @@ public: int getFD() const; - size_t getFileSize() override; + std::optional tryGetFileSize() override; size_t readBigAt(char * to, size_t n, size_t offset, const std::function &) const override; bool supportsReadAt() override { return true; } diff --git a/src/IO/MMappedFile.cpp b/src/IO/MMappedFile.cpp index 7249a25decb..26949061274 100644 --- a/src/IO/MMappedFile.cpp +++ b/src/IO/MMappedFile.cpp @@ -69,7 +69,10 @@ void MMappedFile::close() finish(); if (0 != ::close(fd)) + { + fd = -1; throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); + } fd = -1; metric_increment.destroy(); diff --git a/src/IO/OpenedFile.cpp b/src/IO/OpenedFile.cpp index 4677a8259db..c51dc430b35 100644 --- a/src/IO/OpenedFile.cpp +++ b/src/IO/OpenedFile.cpp @@ -67,11 +67,13 @@ void OpenedFile::close() return; if (0 != ::close(fd)) + { + fd = -1; throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); + } fd = -1; metric_increment.destroy(); } } - diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index e6771235a8e..89cff670e37 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -152,7 +152,7 @@ off_t ParallelReadBuffer::seek(off_t offset, int whence) return offset; } -size_t ParallelReadBuffer::getFileSize() +std::optional ParallelReadBuffer::tryGetFileSize() { return file_size; } diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h index cfeec2b3677..8852472a8bc 100644 --- a/src/IO/ParallelReadBuffer.h +++ b/src/IO/ParallelReadBuffer.h @@ -33,7 +33,7 @@ public: ~ParallelReadBuffer() override { finishAndWait(); } off_t seek(off_t off, int whence) override; - size_t getFileSize() override; + std::optional tryGetFileSize() override; off_t getPosition() override; const SeekableReadBuffer & getReadBuffer() const { return input; } diff --git a/src/IO/ReadBufferFromEmptyFile.h b/src/IO/ReadBufferFromEmptyFile.h index f21f2f507dc..7808ef62fd9 100644 --- a/src/IO/ReadBufferFromEmptyFile.h +++ b/src/IO/ReadBufferFromEmptyFile.h @@ -19,7 +19,8 @@ private: std::string getFileName() const override { return ""; } off_t seek(off_t /*off*/, int /*whence*/) override { return 0; } off_t getPosition() override { return 0; } - size_t getFileSize() override { return 0; } + std::optional tryGetFileSize() override { return 0; } + size_t getFileOffsetOfBufferEnd() const override { return 0; } }; } diff --git a/src/IO/ReadBufferFromEncryptedFile.h b/src/IO/ReadBufferFromEncryptedFile.h index 3626daccb3e..213d242bb91 100644 --- a/src/IO/ReadBufferFromEncryptedFile.h +++ b/src/IO/ReadBufferFromEncryptedFile.h @@ -30,7 +30,7 @@ public: void setReadUntilEnd() override { in->setReadUntilEnd(); } - size_t getFileSize() override { return in->getFileSize(); } + std::optional tryGetFileSize() override { return in->tryGetFileSize(); } private: bool nextImpl() override; diff --git a/src/IO/ReadBufferFromFile.cpp b/src/IO/ReadBufferFromFile.cpp index cb987171bad..2e5b9fcf753 100644 --- a/src/IO/ReadBufferFromFile.cpp +++ b/src/IO/ReadBufferFromFile.cpp @@ -88,7 +88,10 @@ void ReadBufferFromFile::close() return; if (0 != ::close(fd)) + { + fd = -1; throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); + } fd = -1; metric_increment.destroy(); diff --git a/src/IO/ReadBufferFromFileBase.cpp b/src/IO/ReadBufferFromFileBase.cpp index 4ac3f984f78..b7a1438cff8 100644 --- a/src/IO/ReadBufferFromFileBase.cpp +++ b/src/IO/ReadBufferFromFileBase.cpp @@ -5,11 +5,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int UNKNOWN_FILE_SIZE; -} - ReadBufferFromFileBase::ReadBufferFromFileBase() : BufferWithOwnMemory(0) { } @@ -26,11 +21,9 @@ ReadBufferFromFileBase::ReadBufferFromFileBase( ReadBufferFromFileBase::~ReadBufferFromFileBase() = default; -size_t ReadBufferFromFileBase::getFileSize() +std::optional ReadBufferFromFileBase::tryGetFileSize() { - if (file_size) - return *file_size; - throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for read buffer"); + return file_size; } void ReadBufferFromFileBase::setProgressCallback(ContextPtr context) diff --git a/src/IO/ReadBufferFromFileBase.h b/src/IO/ReadBufferFromFileBase.h index 9870d8bbe43..c98dcd5a93e 100644 --- a/src/IO/ReadBufferFromFileBase.h +++ b/src/IO/ReadBufferFromFileBase.h @@ -50,7 +50,7 @@ public: clock_type = clock_type_; } - size_t getFileSize() override; + std::optional tryGetFileSize() override; void setProgressCallback(ContextPtr context); diff --git a/src/IO/ReadBufferFromFileDecorator.cpp b/src/IO/ReadBufferFromFileDecorator.cpp index 9ac0fb4e475..8a6468b9bd0 100644 --- a/src/IO/ReadBufferFromFileDecorator.cpp +++ b/src/IO/ReadBufferFromFileDecorator.cpp @@ -52,9 +52,9 @@ bool ReadBufferFromFileDecorator::nextImpl() return result; } -size_t ReadBufferFromFileDecorator::getFileSize() +std::optional ReadBufferFromFileDecorator::tryGetFileSize() { - return getFileSizeFromReadBuffer(*impl); + return tryGetFileSizeFromReadBuffer(*impl); } } diff --git a/src/IO/ReadBufferFromFileDecorator.h b/src/IO/ReadBufferFromFileDecorator.h index 6e62c7f741b..69f029c5cf7 100644 --- a/src/IO/ReadBufferFromFileDecorator.h +++ b/src/IO/ReadBufferFromFileDecorator.h @@ -27,7 +27,7 @@ public: ReadBuffer & getWrappedReadBuffer() { return *impl; } - size_t getFileSize() override; + std::optional tryGetFileSize() override; protected: std::unique_ptr impl; diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index 76a80f145e7..51a1a5d8d93 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -253,7 +253,7 @@ void ReadBufferFromFileDescriptor::rewind() file_offset_of_buffer_end = 0; } -size_t ReadBufferFromFileDescriptor::getFileSize() +std::optional ReadBufferFromFileDescriptor::tryGetFileSize() { return getSizeFromFileDescriptor(fd, getFileName()); } diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index db256ef91c7..6083e744c95 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -69,7 +69,7 @@ public: /// Seek to the beginning, discarding already read data if any. Useful to reread file that changes on every read. void rewind(); - size_t getFileSize() override; + std::optional tryGetFileSize() override; bool checkIfActuallySeekable() override; diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index bf97e959ee0..6972bae64b4 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -311,15 +311,15 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence) return offset; } -size_t ReadBufferFromS3::getFileSize() +std::optional ReadBufferFromS3::tryGetFileSize() { if (file_size) - return *file_size; + return file_size; auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id); file_size = object_size; - return *file_size; + return file_size; } off_t ReadBufferFromS3::getPosition() diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index c6625c2d632..ff04f78ce7b 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -63,7 +63,7 @@ public: off_t getPosition() override; - size_t getFileSize() override; + std::optional tryGetFileSize() override; void setReadUntilPosition(size_t position) override; void setReadUntilEnd() override; diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index b753e66da48..4b2e6580f9b 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -72,7 +72,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int CANNOT_SEEK_THROUGH_FILE; extern const int SEEK_POSITION_OUT_OF_BOUND; - extern const int UNKNOWN_FILE_SIZE; } std::unique_ptr ReadWriteBufferFromHTTP::CallResult::transformToReadBuffer(size_t buf_size) && @@ -121,15 +120,33 @@ void ReadWriteBufferFromHTTP::prepareRequest(Poco::Net::HTTPRequest & request, s credentials.authenticate(request); } -size_t ReadWriteBufferFromHTTP::getFileSize() +std::optional ReadWriteBufferFromHTTP::tryGetFileSize() { if (!file_info) - file_info = getFileInfo(); + { + try + { + file_info = getFileInfo(); + } + catch (const HTTPException &) + { + return std::nullopt; + } + catch (const NetException &) + { + return std::nullopt; + } + catch (const Poco::Net::NetException &) + { + return std::nullopt; + } + catch (const Poco::IOException &) + { + return std::nullopt; + } + } - if (file_info->file_size) - return *file_info->file_size; - - throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", initial_uri.toString()); + return file_info->file_size; } bool ReadWriteBufferFromHTTP::supportsReadAt() @@ -311,12 +328,12 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, error_message = e.displayText(); exception = std::current_exception(); } - catch (DB::NetException & e) + catch (NetException & e) { error_message = e.displayText(); exception = std::current_exception(); } - catch (DB::HTTPException & e) + catch (HTTPException & e) { if (!isRetriableError(e.getHTTPStatus())) is_retriable = false; @@ -324,7 +341,7 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, error_message = e.displayText(); exception = std::current_exception(); } - catch (DB::Exception & e) + catch (Exception & e) { is_retriable = false; @@ -683,7 +700,19 @@ std::optional ReadWriteBufferFromHTTP::tryGetLastModificationTime() { file_info = getFileInfo(); } - catch (...) + catch (const HTTPException &) + { + return std::nullopt; + } + catch (const NetException &) + { + return std::nullopt; + } + catch (const Poco::Net::NetException &) + { + return std::nullopt; + } + catch (const Poco::IOException &) { return std::nullopt; } @@ -704,7 +733,7 @@ ReadWriteBufferFromHTTP::HTTPFileInfo ReadWriteBufferFromHTTP::getFileInfo() { getHeadResponse(response); } - catch (HTTPException & e) + catch (const HTTPException & e) { /// Maybe the web server doesn't support HEAD requests. /// E.g. webhdfs reports status 400. diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index f496fe3ddcd..1c9bda53008 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -118,7 +118,7 @@ private: std::unique_ptr initialize(); - size_t getFileSize() override; + std::optional tryGetFileSize() override; bool supportsReadAt() override; diff --git a/src/IO/WithFileSize.cpp b/src/IO/WithFileSize.cpp index 3660d962c08..54747cef8af 100644 --- a/src/IO/WithFileSize.cpp +++ b/src/IO/WithFileSize.cpp @@ -5,6 +5,7 @@ #include #include + namespace DB { @@ -13,41 +14,47 @@ namespace ErrorCodes extern const int UNKNOWN_FILE_SIZE; } -template -static size_t getFileSize(T & in) +size_t WithFileSize::getFileSize() { - if (auto * with_file_size = dynamic_cast(&in)) - { - return with_file_size->getFileSize(); - } + if (auto maybe_size = tryGetFileSize()) + return *maybe_size; throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size"); } -size_t getFileSizeFromReadBuffer(ReadBuffer & in) +template +static std::optional tryGetFileSize(T & in) { - if (auto * delegate = dynamic_cast(&in)) - { - return getFileSize(delegate->getWrappedReadBuffer()); - } - else if (auto * compressed = dynamic_cast(&in)) - { - return getFileSize(compressed->getWrappedReadBuffer()); - } + if (auto * with_file_size = dynamic_cast(&in)) + return with_file_size->tryGetFileSize(); - return getFileSize(in); + return std::nullopt; +} + +template +static size_t getFileSize(T & in) +{ + if (auto maybe_size = tryGetFileSize(in)) + return *maybe_size; + + throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size"); } std::optional tryGetFileSizeFromReadBuffer(ReadBuffer & in) { - try - { - return getFileSizeFromReadBuffer(in); - } - catch (...) - { - return std::nullopt; - } + if (auto * delegate = dynamic_cast(&in)) + return tryGetFileSize(delegate->getWrappedReadBuffer()); + else if (auto * compressed = dynamic_cast(&in)) + return tryGetFileSize(compressed->getWrappedReadBuffer()); + return tryGetFileSize(in); +} + +size_t getFileSizeFromReadBuffer(ReadBuffer & in) +{ + if (auto maybe_size = tryGetFileSizeFromReadBuffer(in)) + return *maybe_size; + + throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size"); } bool isBufferWithFileSize(const ReadBuffer & in) @@ -82,5 +89,4 @@ size_t getDataOffsetMaybeCompressed(const ReadBuffer & in) return in.count(); } - } diff --git a/src/IO/WithFileSize.h b/src/IO/WithFileSize.h index 0ae3af98ea0..e5dc383fab0 100644 --- a/src/IO/WithFileSize.h +++ b/src/IO/WithFileSize.h @@ -10,15 +10,16 @@ class ReadBuffer; class WithFileSize { public: - virtual size_t getFileSize() = 0; + /// Returns nullopt if couldn't find out file size; + virtual std::optional tryGetFileSize() = 0; virtual ~WithFileSize() = default; + + size_t getFileSize(); }; bool isBufferWithFileSize(const ReadBuffer & in); size_t getFileSizeFromReadBuffer(ReadBuffer & in); - -/// Return nullopt if couldn't find out file size; std::optional tryGetFileSizeFromReadBuffer(ReadBuffer & in); size_t getDataOffsetMaybeCompressed(const ReadBuffer & in); diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp index 37b1161356f..f1825ce1e22 100644 --- a/src/IO/WriteBufferFromFile.cpp +++ b/src/IO/WriteBufferFromFile.cpp @@ -116,7 +116,10 @@ void WriteBufferFromFile::close() finalize(); if (0 != ::close(fd)) + { + fd = -1; throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); + } fd = -1; metric_increment.destroy(); diff --git a/src/IO/examples/CMakeLists.txt b/src/IO/examples/CMakeLists.txt index fc9d9c7dcd1..bfd171a3d00 100644 --- a/src/IO/examples/CMakeLists.txt +++ b/src/IO/examples/CMakeLists.txt @@ -1,77 +1,77 @@ clickhouse_add_executable (read_buffer read_buffer.cpp) -target_link_libraries (read_buffer PRIVATE clickhouse_common_io) +target_link_libraries (read_buffer PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (read_buffer_perf read_buffer_perf.cpp) -target_link_libraries (read_buffer_perf PRIVATE clickhouse_common_io) +target_link_libraries (read_buffer_perf PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (read_float_perf read_float_perf.cpp) -target_link_libraries (read_float_perf PRIVATE clickhouse_common_io) +target_link_libraries (read_float_perf PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (write_buffer write_buffer.cpp) -target_link_libraries (write_buffer PRIVATE clickhouse_common_io) +target_link_libraries (write_buffer PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (write_buffer_perf write_buffer_perf.cpp) -target_link_libraries (write_buffer_perf PRIVATE clickhouse_common_io) +target_link_libraries (write_buffer_perf PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (valid_utf8_perf valid_utf8_perf.cpp) -target_link_libraries (valid_utf8_perf PRIVATE clickhouse_common_io) +target_link_libraries (valid_utf8_perf PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (valid_utf8 valid_utf8.cpp) -target_link_libraries (valid_utf8 PRIVATE clickhouse_common_io) +target_link_libraries (valid_utf8 PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (var_uint var_uint.cpp) -target_link_libraries (var_uint PRIVATE clickhouse_common_io) +target_link_libraries (var_uint PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (read_escaped_string read_escaped_string.cpp) -target_link_libraries (read_escaped_string PRIVATE clickhouse_common_io) +target_link_libraries (read_escaped_string PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (parse_int_perf parse_int_perf.cpp) -target_link_libraries (parse_int_perf PRIVATE clickhouse_common_io) +target_link_libraries (parse_int_perf PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (parse_int_perf2 parse_int_perf2.cpp) -target_link_libraries (parse_int_perf2 PRIVATE clickhouse_common_io) +target_link_libraries (parse_int_perf2 PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (read_write_int read_write_int.cpp) -target_link_libraries (read_write_int PRIVATE clickhouse_common_io) +target_link_libraries (read_write_int PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (o_direct_and_dirty_pages o_direct_and_dirty_pages.cpp) -target_link_libraries (o_direct_and_dirty_pages PRIVATE clickhouse_common_io) +target_link_libraries (o_direct_and_dirty_pages PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (io_operators io_operators.cpp) -target_link_libraries (io_operators PRIVATE clickhouse_common_io) +target_link_libraries (io_operators PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (write_int write_int.cpp) -target_link_libraries (write_int PRIVATE clickhouse_common_io) +target_link_libraries (write_int PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (zlib_buffers zlib_buffers.cpp) -target_link_libraries (zlib_buffers PRIVATE clickhouse_common_io) +target_link_libraries (zlib_buffers PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (lzma_buffers lzma_buffers.cpp) -target_link_libraries (lzma_buffers PRIVATE clickhouse_common_io) +target_link_libraries (lzma_buffers PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (limit_read_buffer limit_read_buffer.cpp) -target_link_libraries (limit_read_buffer PRIVATE clickhouse_common_io) +target_link_libraries (limit_read_buffer PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (limit_read_buffer2 limit_read_buffer2.cpp) -target_link_libraries (limit_read_buffer2 PRIVATE clickhouse_common_io) +target_link_libraries (limit_read_buffer2 PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (parse_date_time_best_effort parse_date_time_best_effort.cpp) -target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io) +target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (zlib_ng_bug zlib_ng_bug.cpp) -target_link_libraries (zlib_ng_bug PRIVATE ch_contrib::zlib clickhouse_common_io) +target_link_libraries (zlib_ng_bug PRIVATE ch_contrib::zlib clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (dragonbox_test dragonbox_test.cpp) -target_link_libraries (dragonbox_test PRIVATE ch_contrib::dragonbox_to_chars clickhouse_common_io) +target_link_libraries (dragonbox_test PRIVATE ch_contrib::dragonbox_to_chars clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (zstd_buffers zstd_buffers.cpp) -target_link_libraries (zstd_buffers PRIVATE clickhouse_common_io) +target_link_libraries (zstd_buffers PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (snappy_read_buffer snappy_read_buffer.cpp) -target_link_libraries (snappy_read_buffer PRIVATE clickhouse_common_io) +target_link_libraries (snappy_read_buffer PRIVATE clickhouse_common_io clickhouse_common_config) clickhouse_add_executable (hadoop_snappy_read_buffer hadoop_snappy_read_buffer.cpp) -target_link_libraries (hadoop_snappy_read_buffer PRIVATE clickhouse_common_io) +target_link_libraries (hadoop_snappy_read_buffer PRIVATE clickhouse_common_io clickhouse_common_config) if (TARGET ch_contrib::hdfs) clickhouse_add_executable (read_buffer_from_hdfs read_buffer_from_hdfs.cpp) diff --git a/src/IO/tests/gtest_archive_reader_and_writer.cpp b/src/IO/tests/gtest_archive_reader_and_writer.cpp index 898c7017e7d..06f8f53546b 100644 --- a/src/IO/tests/gtest_archive_reader_and_writer.cpp +++ b/src/IO/tests/gtest_archive_reader_and_writer.cpp @@ -492,48 +492,6 @@ TEST_P(ArchiveReaderAndWriterTest, ManyFilesOnDisk) } } -TEST_P(ArchiveReaderAndWriterTest, LargeFile) -{ - /// Make an archive. - std::string_view contents = "The contents of a.txt\n"; - int times = 10000000; - { - auto writer = createArchiveWriter(getPathToArchive()); - { - auto out = writer->writeFile("a.txt", times * contents.size()); - for (int i = 0; i < times; i++) - writeString(contents, *out); - out->finalize(); - } - writer->finalize(); - } - - /// Read the archive. - auto reader = createArchiveReader(getPathToArchive()); - - ASSERT_TRUE(reader->fileExists("a.txt")); - - auto file_info = reader->getFileInfo("a.txt"); - EXPECT_EQ(file_info.uncompressed_size, contents.size() * times); - EXPECT_GT(file_info.compressed_size, 0); - - { - auto in = reader->readFile("a.txt", /*throw_on_not_found=*/true); - for (int i = 0; i < times; i++) - ASSERT_TRUE(checkString(String(contents), *in)); - } - - { - /// Use an enumerator. - auto enumerator = reader->firstFile(); - ASSERT_NE(enumerator, nullptr); - EXPECT_EQ(enumerator->getFileName(), "a.txt"); - EXPECT_EQ(enumerator->getFileInfo().uncompressed_size, contents.size() * times); - EXPECT_GT(enumerator->getFileInfo().compressed_size, 0); - EXPECT_FALSE(enumerator->nextFile()); - } -} - TEST(TarArchiveReaderTest, FileExists) { String archive_path = "archive.tar"; diff --git a/src/IO/tests/gtest_memory_resize.cpp b/src/IO/tests/gtest_memory_resize.cpp index d760a948075..c3b34c352b2 100644 --- a/src/IO/tests/gtest_memory_resize.cpp +++ b/src/IO/tests/gtest_memory_resize.cpp @@ -134,7 +134,7 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding) ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - PADDING_FOR_SIMD); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); ASSERT_TRUE(memory.m_data); // state is intact after exception ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); @@ -158,7 +158,7 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding) ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); ASSERT_TRUE(memory.m_data); // state is intact after exception ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); @@ -197,7 +197,7 @@ TEST(MemoryResizeTest, BigInitAndSmallResizeOverflowWhenPadding) , ErrorCodes::ARGUMENT_OUT_OF_BOUND); } -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD { EXPECT_THROW_ERROR_CODE( { diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index 3c1af6538ad..b53a8b58023 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -917,8 +917,8 @@ TEST_P(SyncAsync, ExceptionOnUploadPart) { TEST_F(WBS3Test, PrefinalizeCalledMultipleTimes) { -#ifdef ABORT_ON_LOGICAL_ERROR - GTEST_SKIP() << "this test trigger LOGICAL_ERROR, runs only if ABORT_ON_LOGICAL_ERROR is not defined"; +#ifdef DEBUG_OR_SANITIZER_BUILD + GTEST_SKIP() << "this test trigger LOGICAL_ERROR, runs only if DEBUG_OR_SANITIZER_BUILD is not defined"; #else EXPECT_THROW({ try { diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 34f3e0a98bd..4aaecc491e0 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -624,9 +624,9 @@ void ActionsDAG::removeAliasesForFilter(const std::string & filter_name) } } -ActionsDAGPtr ActionsDAG::cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases) +ActionsDAG ActionsDAG::cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases) { - auto actions = std::make_shared(); + ActionsDAG actions; std::unordered_map copy_map; struct Frame @@ -661,21 +661,21 @@ ActionsDAGPtr ActionsDAG::cloneSubDAG(const NodeRawConstPtrs & outputs, bool rem if (remove_aliases && frame.node->type == ActionType::ALIAS) copy_node = copy_map[frame.node->children.front()]; else - copy_node = &actions->nodes.emplace_back(*frame.node); + copy_node = &actions.nodes.emplace_back(*frame.node); if (frame.node->type == ActionType::INPUT) - actions->inputs.push_back(copy_node); + actions.inputs.push_back(copy_node); stack.pop(); } } - for (auto & node : actions->nodes) + for (auto & node : actions.nodes) for (auto & child : node.children) child = copy_map[child]; for (const auto * output : outputs) - actions->outputs.push_back(copy_map[output]); + actions.outputs.push_back(copy_map[output]); return actions; } @@ -961,9 +961,9 @@ NameSet ActionsDAG::foldActionsByProjection( } -ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_map & new_inputs, const NodeRawConstPtrs & required_outputs) +ActionsDAG ActionsDAG::foldActionsByProjection(const std::unordered_map & new_inputs, const NodeRawConstPtrs & required_outputs) { - auto dag = std::make_unique(); + ActionsDAG dag; std::unordered_map inputs_mapping; std::unordered_map mapping; struct Frame @@ -1003,9 +1003,9 @@ ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_mapresult_name != rename->result_name; const auto & input_name = should_rename ? rename->result_name : new_input->result_name; - mapped_input = &dag->addInput(input_name, new_input->result_type); + mapped_input = &dag.addInput(input_name, new_input->result_type); if (should_rename) - mapped_input = &dag->addAlias(*mapped_input, new_input->result_name); + mapped_input = &dag.addAlias(*mapped_input, new_input->result_name); } node = mapped_input; @@ -1034,7 +1034,7 @@ ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_mapresult_name, frame.node->result_name); - auto & node = dag->nodes.emplace_back(*frame.node); + auto & node = dag.nodes.emplace_back(*frame.node); for (auto & child : node.children) child = mapping[child]; @@ -1049,8 +1049,8 @@ ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_mapresult_name != mapped_output->result_name) - mapped_output = &dag->addAlias(*mapped_output, output->result_name); - dag->outputs.push_back(mapped_output); + mapped_output = &dag.addAlias(*mapped_output, output->result_name); + dag.outputs.push_back(mapped_output); } return dag; @@ -1246,27 +1246,31 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name) return true; } -ActionsDAGPtr ActionsDAG::clone() const +ActionsDAG ActionsDAG::clone() const { - auto actions = std::make_shared(); + std::unordered_map old_to_new_nodes; + return clone(old_to_new_nodes); +} - std::unordered_map copy_map; +ActionsDAG ActionsDAG::clone(std::unordered_map & old_to_new_nodes) const +{ + ActionsDAG actions; for (const auto & node : nodes) { - auto & copy_node = actions->nodes.emplace_back(node); - copy_map[&node] = ©_node; + auto & copy_node = actions.nodes.emplace_back(node); + old_to_new_nodes[&node] = ©_node; } - for (auto & node : actions->nodes) + for (auto & node : actions.nodes) for (auto & child : node.children) - child = copy_map[child]; + child = old_to_new_nodes[child]; for (const auto & output_node : outputs) - actions->outputs.push_back(copy_map[output_node]); + actions.outputs.push_back(old_to_new_nodes[output_node]); for (const auto & input_node : inputs) - actions->inputs.push_back(copy_map[input_node]); + actions.inputs.push_back(old_to_new_nodes[input_node]); return actions; } @@ -1404,7 +1408,7 @@ const ActionsDAG::Node & ActionsDAG::materializeNode(const Node & node) return addAlias(*func, name); } -ActionsDAGPtr ActionsDAG::makeConvertingActions( +ActionsDAG ActionsDAG::makeConvertingActions( const ColumnsWithTypeAndName & source, const ColumnsWithTypeAndName & result, MatchColumnsMode mode, @@ -1421,7 +1425,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( if (add_casted_columns && mode != MatchColumnsMode::Name) throw Exception(ErrorCodes::LOGICAL_ERROR, "Converting with add_casted_columns supported only for MatchColumnsMode::Name"); - auto actions_dag = std::make_shared(source); + ActionsDAG actions_dag(source); NodeRawConstPtrs projection(num_result_columns); FunctionOverloadResolverPtr func_builder_materialize = std::make_unique(std::make_shared()); @@ -1429,9 +1433,9 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( std::unordered_map> inputs; if (mode == MatchColumnsMode::Name) { - size_t input_nodes_size = actions_dag->inputs.size(); + size_t input_nodes_size = actions_dag.inputs.size(); for (size_t pos = 0; pos < input_nodes_size; ++pos) - inputs[actions_dag->inputs[pos]->result_name].push_back(pos); + inputs[actions_dag.inputs[pos]->result_name].push_back(pos); } for (size_t result_col_num = 0; result_col_num < num_result_columns; ++result_col_num) @@ -1444,7 +1448,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( { case MatchColumnsMode::Position: { - src_node = dst_node = actions_dag->inputs[result_col_num]; + src_node = dst_node = actions_dag.inputs[result_col_num]; break; } @@ -1455,7 +1459,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( { const auto * res_const = typeid_cast(res_elem.column.get()); if (ignore_constant_values && res_const) - src_node = dst_node = &actions_dag->addColumn(res_elem); + src_node = dst_node = &actions_dag.addColumn(res_elem); else throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Cannot find column `{}` in source stream, there are only columns: [{}]", @@ -1463,7 +1467,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( } else { - src_node = dst_node = actions_dag->inputs[input.front()]; + src_node = dst_node = actions_dag.inputs[input.front()]; input.pop_front(); } break; @@ -1476,7 +1480,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( if (const auto * src_const = typeid_cast(dst_node->column.get())) { if (ignore_constant_values) - dst_node = &actions_dag->addColumn(res_elem); + dst_node = &actions_dag.addColumn(res_elem); else if (res_const->getField() != src_const->getField()) throw Exception( ErrorCodes::ILLEGAL_COLUMN, @@ -1498,7 +1502,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( column.column = DataTypeString().createColumnConst(0, column.name); column.type = std::make_shared(); - const auto * right_arg = &actions_dag->addColumn(std::move(column)); + const auto * right_arg = &actions_dag.addColumn(std::move(column)); const auto * left_arg = dst_node; CastDiagnostic diagnostic = {dst_node->result_name, res_elem.name}; @@ -1506,13 +1510,13 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( = createInternalCastOverloadResolver(CastType::nonAccurate, std::move(diagnostic)); NodeRawConstPtrs children = { left_arg, right_arg }; - dst_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {}); + dst_node = &actions_dag.addFunction(func_builder_cast, std::move(children), {}); } if (dst_node->column && isColumnConst(*dst_node->column) && !(res_elem.column && isColumnConst(*res_elem.column))) { NodeRawConstPtrs children = {dst_node}; - dst_node = &actions_dag->addFunction(func_builder_materialize, std::move(children), {}); + dst_node = &actions_dag.addFunction(func_builder_materialize, std::move(children), {}); } if (dst_node->result_name != res_elem.name) @@ -1531,7 +1535,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( } else { - dst_node = &actions_dag->addAlias(*dst_node, res_elem.name); + dst_node = &actions_dag.addAlias(*dst_node, res_elem.name); projection[result_col_num] = dst_node; } } @@ -1541,36 +1545,36 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( } } - actions_dag->outputs.swap(projection); - actions_dag->removeUnusedActions(false); + actions_dag.outputs.swap(projection); + actions_dag.removeUnusedActions(false); return actions_dag; } -ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column) +ActionsDAG ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column) { - auto adding_column_action = std::make_shared(); + ActionsDAG adding_column_action; FunctionOverloadResolverPtr func_builder_materialize = std::make_unique(std::make_shared()); auto column_name = column.name; - const auto * column_node = &adding_column_action->addColumn(std::move(column)); + const auto * column_node = &adding_column_action.addColumn(std::move(column)); NodeRawConstPtrs inputs = {column_node}; - const auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {}); - const auto & alias_node = adding_column_action->addAlias(function_node, std::move(column_name)); + const auto & function_node = adding_column_action.addFunction(func_builder_materialize, std::move(inputs), {}); + const auto & alias_node = adding_column_action.addAlias(function_node, std::move(column_name)); - adding_column_action->outputs.push_back(&alias_node); + adding_column_action.outputs.push_back(&alias_node); return adding_column_action; } -ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) +ActionsDAG ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) { first.mergeInplace(std::move(second)); /// Some actions could become unused. Do not drop inputs to preserve the header. first.removeUnusedActions(false); - return std::make_shared(std::move(first)); + return std::move(first); } void ActionsDAG::mergeInplace(ActionsDAG && second) @@ -1963,15 +1967,15 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split second_inputs.push_back(cur.to_second); } - auto first_actions = std::make_shared(); - first_actions->nodes.swap(first_nodes); - first_actions->outputs.swap(first_outputs); - first_actions->inputs.swap(first_inputs); + ActionsDAG first_actions; + first_actions.nodes.swap(first_nodes); + first_actions.outputs.swap(first_outputs); + first_actions.inputs.swap(first_inputs); - auto second_actions = std::make_shared(); - second_actions->nodes.swap(second_nodes); - second_actions->outputs.swap(second_outputs); - second_actions->inputs.swap(second_inputs); + ActionsDAG second_actions; + second_actions.nodes.swap(second_nodes); + second_actions.outputs.swap(second_outputs); + second_actions.inputs.swap(second_inputs); std::unordered_map split_nodes_mapping; if (create_split_nodes_mapping) @@ -2091,7 +2095,7 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBySortingDescription(const NameS return res; } -bool ActionsDAG::isFilterAlwaysFalseForDefaultValueInputs(const std::string & filter_name, const Block & input_stream_header) +bool ActionsDAG::isFilterAlwaysFalseForDefaultValueInputs(const std::string & filter_name, const Block & input_stream_header) const { const auto * filter_node = tryFindInOutputs(filter_name); if (!filter_node) @@ -2115,7 +2119,7 @@ bool ActionsDAG::isFilterAlwaysFalseForDefaultValueInputs(const std::string & fi input_node_name_to_default_input_column.emplace(input->result_name, std::move(constant_column_with_type_and_name)); } - ActionsDAGPtr filter_with_default_value_inputs; + std::optional filter_with_default_value_inputs; try { @@ -2297,12 +2301,12 @@ ColumnsWithTypeAndName prepareFunctionArguments(const ActionsDAG::NodeRawConstPt /// /// Result actions add single column with conjunction result (it is always first in outputs). /// No other columns are added or removed. -ActionsDAGPtr ActionsDAG::createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs) +std::optional ActionsDAG::createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs) { if (conjunction.empty()) - return nullptr; + return {}; - auto actions = std::make_shared(); + ActionsDAG actions; FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); @@ -2343,7 +2347,7 @@ ActionsDAGPtr ActionsDAG::createActionsForConjunction(NodeRawConstPtrs conjuncti if (cur.next_child_to_visit == cur.node->children.size()) { - auto & node = actions->nodes.emplace_back(*cur.node); + auto & node = actions.nodes.emplace_back(*cur.node); nodes_mapping[cur.node] = &node; for (auto & child : node.children) @@ -2366,33 +2370,33 @@ ActionsDAGPtr ActionsDAG::createActionsForConjunction(NodeRawConstPtrs conjuncti for (const auto * predicate : conjunction) args.emplace_back(nodes_mapping[predicate]); - result_predicate = &actions->addFunction(func_builder_and, std::move(args), {}); + result_predicate = &actions.addFunction(func_builder_and, std::move(args), {}); } - actions->outputs.push_back(result_predicate); + actions.outputs.push_back(result_predicate); for (const auto & col : all_inputs) { const Node * input; auto & list = required_inputs[col.name]; if (list.empty()) - input = &actions->addInput(col); + input = &actions.addInput(col); else { input = list.front(); list.pop_front(); - actions->inputs.push_back(input); + actions.inputs.push_back(input); } /// We should not add result_predicate into the outputs for the second time. if (input->result_name != result_predicate->result_name) - actions->outputs.push_back(input); + actions.outputs.push_back(input); } return actions; } -ActionsDAGPtr ActionsDAG::splitActionsForFilterPushDown( +std::optional ActionsDAG::splitActionsForFilterPushDown( const std::string & filter_name, bool removes_filter, const Names & available_inputs, @@ -2408,7 +2412,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilterPushDown( /// If condition is constant let's do nothing. /// It means there is nothing to push down or optimization was already applied. if (predicate->type == ActionType::COLUMN) - return nullptr; + return {}; std::unordered_set allowed_nodes; @@ -2432,7 +2436,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilterPushDown( auto conjunction = getConjunctionNodes(predicate, allowed_nodes); if (conjunction.allowed.empty()) - return nullptr; + return {}; chassert(predicate->result_type); @@ -2444,13 +2448,13 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilterPushDown( && !conjunction.rejected.front()->result_type->equals(*predicate->result_type)) { /// No further optimization can be done - return nullptr; + return {}; } } auto actions = createActionsForConjunction(conjunction.allowed, all_inputs); if (!actions) - return nullptr; + return {}; /// Now, when actions are created, update the current DAG. removeUnusedConjunctions(std::move(conjunction.rejected), predicate, removes_filter); @@ -2555,11 +2559,11 @@ ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPu auto left_stream_filter_to_push_down = createActionsForConjunction(left_stream_allowed_conjunctions, left_stream_header.getColumnsWithTypeAndName()); auto right_stream_filter_to_push_down = createActionsForConjunction(right_stream_allowed_conjunctions, right_stream_header.getColumnsWithTypeAndName()); - auto replace_equivalent_columns_in_filter = [](const ActionsDAGPtr & filter, + auto replace_equivalent_columns_in_filter = [](const ActionsDAG & filter, const Block & stream_header, const std::unordered_map & columns_to_replace) { - auto updated_filter = ActionsDAG::buildFilterActionsDAG({filter->getOutputs()[0]}, columns_to_replace); + auto updated_filter = ActionsDAG::buildFilterActionsDAG({filter.getOutputs()[0]}, columns_to_replace); chassert(updated_filter->getOutputs().size() == 1); /** If result filter to left or right stream has column that is one of the stream inputs, we need distinguish filter column from @@ -2580,7 +2584,7 @@ ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPu for (const auto & input : updated_filter->getInputs()) updated_filter_inputs[input->result_name].push_back(input); - for (const auto & input : filter->getInputs()) + for (const auto & input : filter.getInputs()) { if (updated_filter_inputs.contains(input->result_name)) continue; @@ -2618,12 +2622,12 @@ ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPu }; if (left_stream_filter_to_push_down) - left_stream_filter_to_push_down = replace_equivalent_columns_in_filter(left_stream_filter_to_push_down, + left_stream_filter_to_push_down = replace_equivalent_columns_in_filter(*left_stream_filter_to_push_down, left_stream_header, equivalent_right_stream_column_to_left_stream_column); if (right_stream_filter_to_push_down) - right_stream_filter_to_push_down = replace_equivalent_columns_in_filter(right_stream_filter_to_push_down, + right_stream_filter_to_push_down = replace_equivalent_columns_in_filter(*right_stream_filter_to_push_down, right_stream_header, equivalent_left_stream_column_to_right_stream_column); @@ -2852,13 +2856,13 @@ bool ActionsDAG::isSortingPreserved( return true; } -ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( +std::optional ActionsDAG::buildFilterActionsDAG( const NodeRawConstPtrs & filter_nodes, const std::unordered_map & node_name_to_input_node_column, bool single_output_condition_node) { if (filter_nodes.empty()) - return nullptr; + return {}; struct Frame { @@ -2866,7 +2870,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( bool visited_children = false; }; - auto result_dag = std::make_shared(); + ActionsDAG result_dag; std::unordered_map result_inputs; std::unordered_map node_to_result_node; @@ -2897,7 +2901,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( { auto & result_input = result_inputs[input_node_it->second.name]; if (!result_input) - result_input = &result_dag->addInput(input_node_it->second); + result_input = &result_dag.addInput(input_node_it->second); node_to_result_node.emplace(node, result_input); nodes_to_process.pop_back(); @@ -2924,25 +2928,25 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( { auto & result_input = result_inputs[node->result_name]; if (!result_input) - result_input = &result_dag->addInput({node->column, node->result_type, node->result_name}); + result_input = &result_dag.addInput({node->column, node->result_type, node->result_name}); result_node = result_input; break; } case ActionsDAG::ActionType::COLUMN: { - result_node = &result_dag->addColumn({node->column, node->result_type, node->result_name}); + result_node = &result_dag.addColumn({node->column, node->result_type, node->result_name}); break; } case ActionsDAG::ActionType::ALIAS: { const auto * child = node->children.front(); - result_node = &result_dag->addAlias(*(node_to_result_node.find(child)->second), node->result_name); + result_node = &result_dag.addAlias(*(node_to_result_node.find(child)->second), node->result_name); break; } case ActionsDAG::ActionType::ARRAY_JOIN: { const auto * child = node->children.front(); - result_node = &result_dag->addArrayJoin(*(node_to_result_node.find(child)->second), {}); + result_node = &result_dag.addArrayJoin(*(node_to_result_node.find(child)->second), {}); break; } case ActionsDAG::ActionType::FUNCTION: @@ -2960,13 +2964,11 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( { if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) { - ActionsDAGPtr index_hint_filter_dag; - const auto & index_hint_args = index_hint->getActions()->getOutputs(); + ActionsDAG index_hint_filter_dag; + const auto & index_hint_args = index_hint->getActions().getOutputs(); - if (index_hint_args.empty()) - index_hint_filter_dag = std::make_shared(); - else - index_hint_filter_dag = buildFilterActionsDAG(index_hint_args, + if (!index_hint_args.empty()) + index_hint_filter_dag = *buildFilterActionsDAG(index_hint_args, node_name_to_input_node_column, false /*single_output_condition_node*/); @@ -2988,7 +2990,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( auto [arguments, all_const] = getFunctionArguments(function_children); auto function_base = function_overload_resolver ? function_overload_resolver->build(arguments) : node->function_base; - result_node = &result_dag->addFunctionImpl( + result_node = &result_dag.addFunctionImpl( function_base, std::move(function_children), std::move(arguments), @@ -3003,7 +3005,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( nodes_to_process.pop_back(); } - auto & result_dag_outputs = result_dag->getOutputs(); + auto & result_dag_outputs = result_dag.getOutputs(); result_dag_outputs.reserve(filter_nodes_size); for (const auto & node : filter_nodes) @@ -3012,7 +3014,7 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( if (result_dag_outputs.size() > 1 && single_output_condition_node) { FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); - result_dag_outputs = { &result_dag->addFunction(func_builder_and, result_dag_outputs, {}) }; + result_dag_outputs = { &result_dag.addFunction(func_builder_and, result_dag_outputs, {}) }; } return result_dag; @@ -3108,10 +3110,9 @@ ActionsDAG::NodeRawConstPtrs ActionsDAG::filterNodesByAllowedInputs( return nodes; } -FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAGPtr & actions_) - :actions(actions_) +FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAG & actions_) { - const auto & actions_outputs = actions->getOutputs(); + const auto & actions_outputs = actions_.getOutputs(); for (const auto * output_node : actions_outputs) { /// find input node which refers to the output node @@ -3147,10 +3148,9 @@ const ActionsDAG::Node * FindOriginalNodeForOutputName::find(const String & outp return it->second; } -FindAliasForInputName::FindAliasForInputName(const ActionsDAGPtr & actions_) - :actions(actions_) +FindAliasForInputName::FindAliasForInputName(const ActionsDAG & actions_) { - const auto & actions_outputs = actions->getOutputs(); + const auto & actions_outputs = actions_.getOutputs(); for (const auto * output_node : actions_outputs) { /// find input node which corresponds to alias diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index c9974fd849c..43c1b41a240 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -11,9 +11,6 @@ namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; - class IExecutableFunction; using ExecutableFunctionPtr = std::shared_ptr; @@ -247,7 +244,7 @@ public: /// c * d e /// \ / /// c * d - e - static ActionsDAGPtr foldActionsByProjection( + static ActionsDAG foldActionsByProjection( const std::unordered_map & new_inputs, const NodeRawConstPtrs & required_outputs); @@ -261,9 +258,10 @@ public: void compileExpressions(size_t min_count_to_compile_expression, const std::unordered_set & lazy_executed_nodes = {}); #endif - ActionsDAGPtr clone() const; + ActionsDAG clone(std::unordered_map & old_to_new_nodes) const; + ActionsDAG clone() const; - static ActionsDAGPtr cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases); + static ActionsDAG cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases); /// Execute actions for header. Input block must have empty columns. /// Result should be equal to the execution of ExpressionActions built from this DAG. @@ -301,7 +299,7 @@ public: /// @param ignore_constant_values - Do not check that constants are same. Use value from result_header. /// @param add_casted_columns - Create new columns with converted values instead of replacing original. /// @param new_names - Output parameter for new column names when add_casted_columns is used. - static ActionsDAGPtr makeConvertingActions( + static ActionsDAG makeConvertingActions( const ColumnsWithTypeAndName & source, const ColumnsWithTypeAndName & result, MatchColumnsMode mode, @@ -310,13 +308,13 @@ public: NameToNameMap * new_names = nullptr); /// Create expression which add const column and then materialize it. - static ActionsDAGPtr makeAddingColumnActions(ColumnWithTypeAndName column); + static ActionsDAG makeAddingColumnActions(ColumnWithTypeAndName column); /// Create ActionsDAG which represents expression equivalent to applying first and second actions consequently. /// Is used to replace `(first -> second)` expression chain to single `merge(first, second)` expression. /// If first.settings.project_input is set, then outputs of `first` must include inputs of `second`. /// Otherwise, any two actions may be combined. - static ActionsDAGPtr merge(ActionsDAG && first, ActionsDAG && second); + static ActionsDAG merge(ActionsDAG && first, ActionsDAG && second); /// The result is similar to merge(*this, second); /// Invariant : no nodes are removed from the first (this) DAG. @@ -327,12 +325,7 @@ public: /// *out_outputs is filled with pointers to the nodes corresponding to second.getOutputs(). void mergeNodes(ActionsDAG && second, NodeRawConstPtrs * out_outputs = nullptr); - struct SplitResult - { - ActionsDAGPtr first; - ActionsDAGPtr second; - std::unordered_map split_nodes_mapping; - }; + struct SplitResult; /// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children. /// Execution of first then second parts on block is equivalent to execution of initial DAG. @@ -360,7 +353,7 @@ public: * @param filter_name - name of filter node in current DAG. * @param input_stream_header - input stream header. */ - bool isFilterAlwaysFalseForDefaultValueInputs(const std::string & filter_name, const Block & input_stream_header); + bool isFilterAlwaysFalseForDefaultValueInputs(const std::string & filter_name, const Block & input_stream_header) const; /// Create actions which may calculate part of filter using only available_inputs. /// If nothing may be calculated, returns nullptr. @@ -379,19 +372,13 @@ public: /// columns will be transformed like `x, y, z` -> `z > 0, z, x, y` -(remove filter)-> `z, x, y`. /// To avoid it, add inputs from `all_inputs` list, /// so actions `x, y, z -> z > 0, x, y, z` -(remove filter)-> `x, y, z` will not change columns order. - ActionsDAGPtr splitActionsForFilterPushDown( + std::optional splitActionsForFilterPushDown( const std::string & filter_name, bool removes_filter, const Names & available_inputs, const ColumnsWithTypeAndName & all_inputs); - struct ActionsForJOINFilterPushDown - { - ActionsDAGPtr left_stream_filter_to_push_down; - bool left_stream_filter_removes_filter; - ActionsDAGPtr right_stream_filter_to_push_down; - bool right_stream_filter_removes_filter; - }; + struct ActionsForJOINFilterPushDown; /** Split actions for JOIN filter push down. * @@ -438,7 +425,7 @@ public: * * If single_output_condition_node = false, result dag has multiple output nodes. */ - static ActionsDAGPtr buildFilterActionsDAG( + static std::optional buildFilterActionsDAG( const NodeRawConstPtrs & filter_nodes, const std::unordered_map & node_name_to_input_node_column = {}, bool single_output_condition_node = true); @@ -470,21 +457,35 @@ private: void compileFunctions(size_t min_count_to_compile_expression, const std::unordered_set & lazy_executed_nodes = {}); #endif - static ActionsDAGPtr createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs); + static std::optional createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs); void removeUnusedConjunctions(NodeRawConstPtrs rejected_conjunctions, Node * predicate, bool removes_filter); }; +struct ActionsDAG::SplitResult +{ + ActionsDAG first; + ActionsDAG second; + std::unordered_map split_nodes_mapping; +}; + +struct ActionsDAG::ActionsForJOINFilterPushDown +{ + std::optional left_stream_filter_to_push_down; + bool left_stream_filter_removes_filter; + std::optional right_stream_filter_to_push_down; + bool right_stream_filter_removes_filter; +}; + class FindOriginalNodeForOutputName { using NameToNodeIndex = std::unordered_map; public: - explicit FindOriginalNodeForOutputName(const ActionsDAGPtr & actions); + explicit FindOriginalNodeForOutputName(const ActionsDAG & actions); const ActionsDAG::Node * find(const String & output_name); private: - ActionsDAGPtr actions; NameToNodeIndex index; }; @@ -493,11 +494,10 @@ class FindAliasForInputName using NameToNodeIndex = std::unordered_map; public: - explicit FindAliasForInputName(const ActionsDAGPtr & actions); + explicit FindAliasForInputName(const ActionsDAG & actions); const ActionsDAG::Node * find(const String & name); private: - ActionsDAGPtr actions; NameToNodeIndex index; }; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index a874067e43c..e1b7e92ee5d 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1024,7 +1024,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & dag.project(args); auto index_hint = std::make_shared(); - index_hint->setActions(std::make_shared(std::move(dag))); + index_hint->setActions(std::move(dag)); // Arguments are removed. We add function instead of constant column to avoid constant folding. data.addFunction(std::make_unique(index_hint), {}, column_name); @@ -1287,7 +1287,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & lambda_dag.removeUnusedActions(Names(1, result_name)); auto lambda_actions = std::make_shared( - std::make_shared(std::move(lambda_dag)), + std::move(lambda_dag), ExpressionActionsSettings::fromContext(data.getContext(), CompileExpressions::yes)); DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 46d2d60e461..5b638fc14c8 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -18,12 +18,6 @@ namespace DB class ASTExpressionList; class ASTFunction; -class ExpressionActions; -using ExpressionActionsPtr = std::shared_ptr; - -class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; - class IFunctionOverloadResolver; using FunctionOverloadResolverPtr = std::shared_ptr; @@ -32,7 +26,7 @@ FutureSetPtr makeExplicitSet( const ASTFunction * node, const ActionsDAG & actions, ContextPtr context, PreparedSets & prepared_sets); /** For ActionsVisitor - * A stack of ExpressionActions corresponding to nested lambda expressions. + * A stack of ActionsDAG corresponding to nested lambda expressions. * The new action should be added to the highest possible level. * For example, in the expression "select arrayMap(x -> x + column1 * column2, array1)" * calculation of the product must be done outside the lambda expression (it does not depend on x), diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index e073b7a49b6..90bbde8be35 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -2997,7 +2997,11 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari std::unique_ptr thread_pool; if (max_threads > 1 && total_input_rows > 100000) /// TODO Make a custom threshold. - thread_pool = std::make_unique(CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, max_threads); + thread_pool = std::make_unique( + CurrentMetrics::AggregatorThreads, + CurrentMetrics::AggregatorThreadsActive, + CurrentMetrics::AggregatorThreadsScheduled, + max_threads); for (const auto & bucket_blocks : bucket_to_blocks) { @@ -3009,7 +3013,10 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari result.aggregates_pools.push_back(std::make_shared()); Arena * aggregates_pool = result.aggregates_pools.back().get(); - auto task = [group = CurrentThread::getGroup(), bucket, &merge_bucket, aggregates_pool]{ merge_bucket(bucket, aggregates_pool, group); }; + /// if we don't use thread pool we don't need to attach and definitely don't want to detach from the thread group + /// because this thread is already attached + auto task = [group = thread_pool != nullptr ? CurrentThread::getGroup() : nullptr, bucket, &merge_bucket, aggregates_pool] + { merge_bucket(bucket, aggregates_pool, group); }; if (thread_pool) thread_pool->scheduleOrThrowOnError(task); diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index a3848fa3a75..a88c0de2cfe 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1007,7 +1007,7 @@ void FileCache::freeSpaceRatioKeepingThreadFunc() limits_satisfied = main_priority->collectCandidatesForEviction( desired_size, desired_elements_num, keep_up_free_space_remove_batch, stat, eviction_candidates, lock); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD /// Let's make sure that we correctly processed the limits. if (limits_satisfied && eviction_candidates.size() < keep_up_free_space_remove_batch) { @@ -1110,7 +1110,7 @@ void FileCache::removeAllReleasable(const UserID & user_id) { assertInitialized(); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD assertCacheCorrectness(); #endif @@ -1226,7 +1226,7 @@ void FileCache::loadMetadataImpl() if (first_exception) std::rethrow_exception(first_exception); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD assertCacheCorrectness(); #endif } @@ -1393,7 +1393,7 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir) FileCache::~FileCache() { deactivateBackgroundOperations(); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD assertCacheCorrectness(); #endif } diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 838ca0b491e..1664a91b694 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -67,7 +67,7 @@ FileSegment::FileSegment( , key_metadata(key_metadata_) , queue_iterator(queue_iterator_) , cache(cache_) -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD , log(getLogger(fmt::format("FileSegment({}) : {}", key_.toString(), range().toString()))) #else , log(getLogger("FileSegment")) @@ -385,9 +385,9 @@ void FileSegment::write(char * from, size_t size, size_t offset_in_file) try { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD /// This mutex is only needed to have a valid assertion in assertCacheCorrectness(), - /// which is only executed in debug/sanitizer builds (under ABORT_ON_LOGICAL_ERROR). + /// which is only executed in debug/sanitizer builds (under DEBUG_OR_SANITIZER_BUILD). std::lock_guard lock(write_mutex); #endif diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 1d23278a255..7e4b76d3cc6 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -963,7 +963,7 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( } else if (!can_be_broken) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); #else LOG_WARNING(key_metadata->logger(), "Expected path {} to exist, while removing {}:{}", diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 5d56ef09127..d04a73e384e 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -517,14 +516,11 @@ void executeQueryWithParallelReplicas( "`cluster_for_parallel_replicas` setting refers to cluster with several shards. Expected a cluster with one shard"); } - auto coordinator = std::make_shared( - new_cluster->getShardsInfo().begin()->getAllNodeCount(), settings.parallel_replicas_mark_segment_size); auto external_tables = new_context->getExternalTables(); auto read_from_remote = std::make_unique( query_ast, new_cluster, storage_id, - std::move(coordinator), header, processed_stage, new_context, diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index d832f568cb8..02323972102 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -49,18 +49,17 @@ namespace ErrorCodes static std::unordered_set processShortCircuitFunctions(const ActionsDAG & actions_dag, ShortCircuitFunctionEvaluation short_circuit_function_evaluation); -ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_, bool project_inputs_) - : project_inputs(project_inputs_) +ExpressionActions::ExpressionActions(ActionsDAG actions_dag_, const ExpressionActionsSettings & settings_, bool project_inputs_) + : actions_dag(std::move(actions_dag_)) + , project_inputs(project_inputs_) , settings(settings_) { - actions_dag = actions_dag_->clone(); - /// It's important to determine lazy executed nodes before compiling expressions. - std::unordered_set lazy_executed_nodes = processShortCircuitFunctions(*actions_dag, settings.short_circuit_function_evaluation); + std::unordered_set lazy_executed_nodes = processShortCircuitFunctions(actions_dag, settings.short_circuit_function_evaluation); #if USE_EMBEDDED_COMPILER if (settings.can_compile_expressions && settings.compile_expressions == CompileExpressions::yes) - actions_dag->compileExpressions(settings.min_count_to_compile_expression, lazy_executed_nodes); + actions_dag.compileExpressions(settings.min_count_to_compile_expression, lazy_executed_nodes); #endif linearizeActions(lazy_executed_nodes); @@ -68,12 +67,32 @@ ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const Expressio if (settings.max_temporary_columns && num_columns > settings.max_temporary_columns) throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS, "Too many temporary columns: {}. Maximum: {}", - actions_dag->dumpNames(), settings.max_temporary_columns); + actions_dag.dumpNames(), settings.max_temporary_columns); } ExpressionActionsPtr ExpressionActions::clone() const { - return std::make_shared(*this); + auto copy = std::make_shared(ExpressionActions()); + + std::unordered_map copy_map; + copy->actions_dag = actions_dag.clone(copy_map); + copy->actions = actions; + for (auto & action : copy->actions) + action.node = copy_map[action.node]; + + for (const auto * input : copy->actions_dag.getInputs()) + copy->input_positions.emplace(input->result_name, input_positions.at(input->result_name)); + + copy->num_columns = num_columns; + + copy->required_columns = required_columns; + copy->result_positions = result_positions; + copy->sample_block = sample_block; + + copy->project_inputs = project_inputs; + copy->settings = settings; + + return copy; } namespace @@ -341,8 +360,8 @@ void ExpressionActions::linearizeActions(const std::unordered_setgetOutputs(); - const auto & inputs = actions_dag->getInputs(); + const auto & outputs = actions_dag.getOutputs(); + const auto & inputs = actions_dag.getInputs(); auto reverse_info = getActionsDAGReverseInfo(nodes, outputs); std::vector data; diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index ddffe022215..6ff39ee07f7 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -70,7 +70,7 @@ public: using NameToInputMap = std::unordered_map>; private: - ActionsDAGPtr actions_dag; + ActionsDAG actions_dag; Actions actions; size_t num_columns = 0; @@ -84,14 +84,13 @@ private: ExpressionActionsSettings settings; public: - ExpressionActions() = delete; - explicit ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_ = {}, bool project_inputs_ = false); - ExpressionActions(const ExpressionActions &) = default; - ExpressionActions & operator=(const ExpressionActions &) = default; + explicit ExpressionActions(ActionsDAG actions_dag_, const ExpressionActionsSettings & settings_ = {}, bool project_inputs_ = false); + ExpressionActions(ExpressionActions &&) = default; + ExpressionActions & operator=(ExpressionActions &&) = default; const Actions & getActions() const { return actions; } - const std::list & getNodes() const { return actions_dag->getNodes(); } - const ActionsDAG & getActionsDAG() const { return *actions_dag; } + const std::list & getNodes() const { return actions_dag.getNodes(); } + const ActionsDAG & getActionsDAG() const { return actions_dag; } const ColumnNumbers & getResultPositions() const { return result_positions; } const ExpressionActionsSettings & getSettings() const { return settings; } @@ -131,6 +130,7 @@ public: ExpressionActionsPtr clone() const; private: + ExpressionActions() = default; void checkLimits(const ColumnsWithTypeAndName & columns) const; void linearizeActions(const std::unordered_set & lazy_executed_nodes); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 16d0eb71278..d25434a515d 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -658,7 +658,7 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, with_alias->getColumnName(), 1 /* direction */, 1 /* nulls_direction */)); - auto actions_dag = std::make_shared(aggregated_columns); + auto actions_dag = std::make_unique(aggregated_columns); getRootActions(column_ast, false, *actions_dag); desc.partition_by_actions.push_back(std::move(actions_dag)); } @@ -679,7 +679,7 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, order_by_element.direction, order_by_element.nulls_direction)); - auto actions_dag = std::make_shared(aggregated_columns); + auto actions_dag = std::make_unique(aggregated_columns); getRootActions(column_ast, false, *actions_dag); desc.order_by_actions.push_back(std::move(actions_dag)); } @@ -823,13 +823,14 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAG & actions) makeWindowDescriptionFromAST(*current_context, window_descriptions, desc, &definition); + auto full_sort_description = desc.full_sort_description; + auto [it, inserted] = window_descriptions.insert( - {default_window_name, desc}); + {default_window_name, std::move(desc)}); if (!inserted) { - assert(it->second.full_sort_description - == desc.full_sort_description); + assert(it->second.full_sort_description == full_sort_description); } it->second.window_functions.push_back(window_function); @@ -927,7 +928,7 @@ JoinPtr SelectQueryExpressionAnalyzer::appendJoin( { const ColumnsWithTypeAndName & left_sample_columns = chain.getLastStep().getResultColumns(); - ActionsDAGPtr converting_actions; + std::optional converting_actions; JoinPtr join = makeJoin(*syntax->ast_join, left_sample_columns, converting_actions); if (converting_actions) @@ -1039,7 +1040,7 @@ static std::unique_ptr buildJoinedPlan( /// Actions which need to be calculated on joined block. auto joined_block_actions = analyzed_join.createJoinedBlockActions(context); NamesWithAliases required_columns_with_aliases = analyzed_join.getRequiredColumns( - Block(joined_block_actions->getResultColumns()), joined_block_actions->getRequiredColumns().getNames()); + Block(joined_block_actions.getResultColumns()), joined_block_actions.getRequiredColumns().getNames()); Names original_right_column_names; for (auto & pr : required_columns_with_aliases) @@ -1060,17 +1061,17 @@ static std::unique_ptr buildJoinedPlan( interpreter->buildQueryPlan(*joined_plan); { Block original_right_columns = interpreter->getSampleBlock(); - auto rename_dag = std::make_unique(original_right_columns.getColumnsWithTypeAndName()); + ActionsDAG rename_dag(original_right_columns.getColumnsWithTypeAndName()); for (const auto & name_with_alias : required_columns_with_aliases) { if (name_with_alias.first != name_with_alias.second && original_right_columns.has(name_with_alias.first)) { auto pos = original_right_columns.getPositionByName(name_with_alias.first); - const auto & alias = rename_dag->addAlias(*rename_dag->getInputs()[pos], name_with_alias.second); - rename_dag->getOutputs()[pos] = &alias; + const auto & alias = rename_dag.addAlias(*rename_dag.getInputs()[pos], name_with_alias.second); + rename_dag.getOutputs()[pos] = &alias; } } - rename_dag->appendInputsForUnusedColumns(joined_plan->getCurrentDataStream().header); + rename_dag.appendInputsForUnusedColumns(joined_plan->getCurrentDataStream().header); auto rename_step = std::make_unique(joined_plan->getCurrentDataStream(), std::move(rename_dag)); rename_step->setStepDescription("Rename joined columns"); joined_plan->addStep(std::move(rename_step)); @@ -1130,14 +1131,14 @@ std::shared_ptr tryKeyValueJoin(std::shared_ptr a JoinPtr SelectQueryExpressionAnalyzer::makeJoin( const ASTTablesInSelectQueryElement & join_element, const ColumnsWithTypeAndName & left_columns, - ActionsDAGPtr & left_convert_actions) + std::optional & left_convert_actions) { /// Two JOINs are not supported with the same subquery, but different USINGs. if (joined_plan) throw Exception(ErrorCodes::LOGICAL_ERROR, "Table join was already created for query"); - ActionsDAGPtr right_convert_actions = nullptr; + std::optional right_convert_actions; const auto & analyzed_join = syntax->analyzed_join; @@ -1145,7 +1146,7 @@ JoinPtr SelectQueryExpressionAnalyzer::makeJoin( { auto joined_block_actions = analyzed_join->createJoinedBlockActions(getContext()); NamesWithAliases required_columns_with_aliases = analyzed_join->getRequiredColumns( - Block(joined_block_actions->getResultColumns()), joined_block_actions->getRequiredColumns().getNames()); + Block(joined_block_actions.getResultColumns()), joined_block_actions.getRequiredColumns().getNames()); Names original_right_column_names; for (auto & pr : required_columns_with_aliases) @@ -1162,7 +1163,7 @@ JoinPtr SelectQueryExpressionAnalyzer::makeJoin( std::tie(left_convert_actions, right_convert_actions) = analyzed_join->createConvertingActions(left_columns, right_columns); if (right_convert_actions) { - auto converting_step = std::make_unique(joined_plan->getCurrentDataStream(), right_convert_actions); + auto converting_step = std::make_unique(joined_plan->getCurrentDataStream(), std::move(*right_convert_actions)); converting_step->setStepDescription("Convert joined columns"); joined_plan->addStep(std::move(converting_step)); } @@ -1354,10 +1355,10 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain { for (auto & child : asts) { - auto actions_dag = std::make_shared(columns_after_join); - getRootActions(child, only_types, *actions_dag); + ActionsDAG actions_dag(columns_after_join); + getRootActions(child, only_types, actions_dag); group_by_elements_actions.emplace_back( - std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); + std::make_shared(std::move(actions_dag), ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); } } @@ -1471,7 +1472,7 @@ void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAG & before_a ExpressionActionsChain::Step & step = chain.addStep(before_aggregation.getNamesAndTypesList()); step.required_output = std::move(required_output); - step.actions()->dag = std::move(*ActionsDAG::makeConvertingActions(source_columns, result_columns, ActionsDAG::MatchColumnsMode::Position)); + step.actions()->dag = ActionsDAG::makeConvertingActions(source_columns, result_columns, ActionsDAG::MatchColumnsMode::Position); } void SelectQueryExpressionAnalyzer::appendSelectSkipWindowExpressions(ExpressionActionsChain::Step & step, ASTPtr const & node) @@ -1607,10 +1608,10 @@ ActionsAndProjectInputsFlagPtr SelectQueryExpressionAnalyzer::appendOrderBy(Expr { for (const auto & child : select_query->orderBy()->children) { - auto actions_dag = std::make_shared(columns_after_join); - getRootActions(child, only_types, *actions_dag); + ActionsDAG actions_dag(columns_after_join); + getRootActions(child, only_types, actions_dag); order_by_elements_actions.emplace_back( - std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); + std::make_shared(std::move(actions_dag), ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); } } @@ -1737,7 +1738,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const step.addRequiredOutput(expr->getColumnName()); } -ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool remove_unused_result) +ActionsDAG ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool remove_unused_result) { ActionsDAG actions_dag(aggregated_columns); NamesWithAliases result_columns; @@ -1789,7 +1790,7 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool remove_un actions_dag.removeUnusedActions(name_set); } - return std::make_unique(std::move(actions_dag)); + return actions_dag; } ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool remove_unused_result, CompileExpressions compile_expressions) @@ -1798,17 +1799,17 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool remov getActionsDAG(add_aliases, remove_unused_result), ExpressionActionsSettings::fromContext(getContext(), compile_expressions), add_aliases && remove_unused_result); } -ActionsDAGPtr ExpressionAnalyzer::getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs) +ActionsDAG ExpressionAnalyzer::getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs) { - auto actions = std::make_shared(constant_inputs); - getRootActions(query, true /* no_makeset_for_subqueries */, *actions, true /* only_consts */); + ActionsDAG actions(constant_inputs); + getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */); return actions; } ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAndName & constant_inputs) { auto actions = getConstActionsDAG(constant_inputs); - return std::make_shared(actions, ExpressionActionsSettings::fromContext(getContext())); + return std::make_shared(std::move(actions), ExpressionActionsSettings::fromContext(getContext())); } std::unique_ptr SelectQueryExpressionAnalyzer::getJoinedPlan() @@ -1879,8 +1880,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (prewhere_dag_and_flags) { - auto dag = std::make_shared(std::move(prewhere_dag_and_flags->dag)); - prewhere_info = std::make_shared(std::move(dag), query.prewhere()->getColumnName()); + prewhere_info = std::make_shared(std::move(prewhere_dag_and_flags->dag), query.prewhere()->getColumnName()); prewhere_dag_and_flags.reset(); } @@ -1923,7 +1923,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (storage && additional_filter) { - Names columns_for_additional_filter = additional_filter->actions->getRequiredColumnsNames(); + Names columns_for_additional_filter = additional_filter->actions.getRequiredColumnsNames(); additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(), columns_for_additional_filter.begin(), columns_for_additional_filter.end()); } @@ -1944,10 +1944,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( Block before_prewhere_sample = source_header; if (sanitizeBlock(before_prewhere_sample)) { - auto dag = prewhere_dag_and_flags->dag.clone(); - ExpressionActions( - dag, - ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample); + before_prewhere_sample = prewhere_dag_and_flags->dag.updateHeader(before_prewhere_sample); auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName()); /// If the filter column is a constant, record it. if (column_elem.column) @@ -1979,9 +1976,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( before_where_sample = source_header; if (sanitizeBlock(before_where_sample)) { - ExpressionActions( - before_where->dag.clone(), - ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); + before_where_sample = before_where->dag.updateHeader(before_where_sample); auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); @@ -2054,7 +2049,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( auto & step = chain.lastStep(query_analyzer.aggregated_columns); auto & actions = step.actions()->dag; - actions = std::move(*ActionsDAG::merge(std::move(actions), std::move(*converting))); + actions = ActionsDAG::merge(std::move(actions), std::move(converting)); } } @@ -2235,12 +2230,11 @@ void ExpressionAnalysisResult::checkActions() const /// Check that PREWHERE doesn't contain unusual actions. Unusual actions are that can change number of rows. if (hasPrewhere()) { - auto check_actions = [](const ActionsDAGPtr & actions) + auto check_actions = [](ActionsDAG & actions) { - if (actions) - for (const auto & node : actions->getNodes()) - if (node.type == ActionsDAG::ActionType::ARRAY_JOIN) - throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "PREWHERE cannot contain ARRAY JOIN action"); + for (const auto & node : actions.getNodes()) + if (node.type == ActionsDAG::ActionType::ARRAY_JOIN) + throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "PREWHERE cannot contain ARRAY JOIN action"); }; check_actions(prewhere_info->prewhere_actions); diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 12d6dce8f72..0c00247df85 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -38,9 +38,6 @@ using StorageMetadataPtr = std::shared_ptr; class ArrayJoinAction; using ArrayJoinActionPtr = std::shared_ptr; -class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; - /// Create columns in block or return false if not possible bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column = false); @@ -117,12 +114,12 @@ public: /// If add_aliases, only the calculated values in the desired order and add aliases. /// If also remove_unused_result, than only aliases remain in the output block. /// Otherwise, only temporary columns will be deleted from the block. - ActionsDAGPtr getActionsDAG(bool add_aliases, bool remove_unused_result = true); + ActionsDAG getActionsDAG(bool add_aliases, bool remove_unused_result = true); ExpressionActionsPtr getActions(bool add_aliases, bool remove_unused_result = true, CompileExpressions compile_expressions = CompileExpressions::no); /// Get actions to evaluate a constant expression. The function adds constants and applies functions that depend only on constants. /// Does not execute subqueries. - ActionsDAGPtr getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs = {}); + ActionsDAG getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs = {}); ExpressionActionsPtr getConstActions(const ColumnsWithTypeAndName & constant_inputs = {}); /** Sets that require a subquery to be create. @@ -367,7 +364,7 @@ private: JoinPtr makeJoin( const ASTTablesInSelectQueryElement & join_element, const ColumnsWithTypeAndName & left_columns, - ActionsDAGPtr & left_convert_actions); + std::optional & left_convert_actions); const ASTSelectQuery * getAggregatingQuery() const; diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 986134a6998..b1745ea8956 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -296,7 +296,7 @@ private: { auto joined_block_actions = data.table_join->createJoinedBlockActions(data.getContext()); NamesWithAliases required_columns_with_aliases = data.table_join->getRequiredColumns( - Block(joined_block_actions->getResultColumns()), joined_block_actions->getRequiredColumns().getNames()); + Block(joined_block_actions.getResultColumns()), joined_block_actions.getRequiredColumns().getNames()); for (auto & pr : required_columns_with_aliases) required_columns.push_back(pr.first); diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp index d8f6df05ca4..0034b50f02c 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp @@ -17,13 +17,19 @@ namespace DB { -IInterpreterUnionOrSelectQuery::IInterpreterUnionOrSelectQuery(const DB::ASTPtr& query_ptr_, - const DB::ContextMutablePtr& context_, const DB::SelectQueryOptions& options_) - : query_ptr(query_ptr_) - , context(context_) - , options(options_) - , max_streams(context->getSettingsRef().max_threads) +IInterpreterUnionOrSelectQuery::IInterpreterUnionOrSelectQuery(const ASTPtr & query_ptr_, + const ContextMutablePtr & context_, const SelectQueryOptions & options_) + : query_ptr(query_ptr_) + , context(context_) + , options(options_) + , max_streams(context->getSettingsRef().max_threads) { + /// FIXME All code here will work with the old analyzer, however for views over Distributed tables + /// it's possible that new analyzer will be enabled in ::getQueryProcessingStage method + /// of the underlying storage when all other parts of infrastructure are not ready for it + /// (built with old analyzer). + context->setSetting("allow_experimental_analyzer", false); + if (options.shard_num) context->addSpecialScalar( "_shard_num", @@ -118,16 +124,16 @@ static ASTPtr parseAdditionalPostFilter(const Context & context) "additional filter", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } -static ActionsDAGPtr makeAdditionalPostFilter(ASTPtr & ast, ContextPtr context, const Block & header) +static ActionsDAG makeAdditionalPostFilter(ASTPtr & ast, ContextPtr context, const Block & header) { auto syntax_result = TreeRewriter(context).analyze(ast, header.getNamesAndTypesList()); String result_column_name = ast->getColumnName(); auto dag = ExpressionAnalyzer(ast, syntax_result, context).getActionsDAG(false, false); - const ActionsDAG::Node * result_node = &dag->findInOutputs(result_column_name); - auto & outputs = dag->getOutputs(); + const ActionsDAG::Node * result_node = &dag.findInOutputs(result_column_name); + auto & outputs = dag.getOutputs(); outputs.clear(); - outputs.reserve(dag->getInputs().size() + 1); - for (const auto * node : dag->getInputs()) + outputs.reserve(dag.getInputs().size() + 1); + for (const auto * node : dag.getInputs()) outputs.push_back(node); outputs.push_back(result_node); @@ -145,7 +151,7 @@ void IInterpreterUnionOrSelectQuery::addAdditionalPostFilter(QueryPlan & plan) c return; auto dag = makeAdditionalPostFilter(ast, context, plan.getCurrentDataStream().header); - std::string filter_name = dag->getOutputs().back()->result_name; + std::string filter_name = dag.getOutputs().back()->result_name; auto filter_step = std::make_unique( plan.getCurrentDataStream(), std::move(dag), std::move(filter_name), true); filter_step->setStepDescription("Additional result filter"); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ea10ad59db4..b8646e1a971 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -82,13 +81,13 @@ #include #include -#include #include #include #include #include + namespace CurrentMetrics { extern const Metric AttachedTable; @@ -147,27 +146,27 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) } auto db_num_limit = getContext()->getGlobalContext()->getServerSettings().max_database_num_to_throw; - if (db_num_limit > 0) + if (db_num_limit > 0 && !internal) { size_t db_count = DatabaseCatalog::instance().getDatabases().size(); - std::vector system_databases = { + std::initializer_list system_databases = + { DatabaseCatalog::TEMPORARY_DATABASE, DatabaseCatalog::SYSTEM_DATABASE, DatabaseCatalog::INFORMATION_SCHEMA, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE, - DatabaseCatalog::DEFAULT_DATABASE }; for (const auto & system_database : system_databases) { - if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(system_database)) - db_count--; + if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(std::string(system_database))) + --db_count; } if (db_count >= db_num_limit) throw Exception(ErrorCodes::TOO_MANY_DATABASES, - "Too many databases in the Clickhouse. " - "The limit (setting 'max_database_num_to_throw') is set to {}, current number of databases is {}", + "Too many databases. " + "The limit (server configuration parameter `max_database_num_to_throw`) is set to {}, the current number of databases is {}", db_num_limit, db_count); } @@ -950,7 +949,7 @@ namespace throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated, Shared or KeeperMap table engines"); } - void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine) + void setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine) { if (engine == DefaultTableEngine::None) throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); @@ -960,19 +959,44 @@ namespace engine_ast->no_empty_args = true; storage.set(storage.engine, engine_ast); } + + void setNullTableEngine(ASTStorage & storage) + { + auto engine_ast = std::make_shared(); + engine_ast->name = "Null"; + engine_ast->no_empty_args = true; + storage.set(storage.engine, engine_ast); + } + } void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const { if (create.as_table_function) + { + if (getContext()->getSettingsRef().restore_replace_external_table_functions_to_null) + { + const auto & factory = TableFunctionFactory::instance(); + + auto properties = factory.tryGetProperties(create.as_table_function->as()->name); + if (properties && properties->allow_readonly) + return; + if (!create.storage) + { + auto storage_ast = std::make_shared(); + create.set(create.storage, storage_ast); + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Storage should not be created yet, it's a bug."); + create.as_table_function = nullptr; + setNullTableEngine(*create.storage); + } return; + } if (create.is_dictionary || create.is_ordinary_view || create.is_live_view || create.is_window_view) return; - if (create.is_materialized_view && create.to_table_id) - return; - if (create.temporary) { /// Some part of storage definition is specified, but ENGINE is not: just set the one from default_temporary_table_engine setting. @@ -987,22 +1011,51 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const } if (!create.storage->engine) - { setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_temporary_table_engine.value); - } checkTemporaryTableEngineName(create.storage->engine->name); return; } + if (create.is_materialized_view) + { + /// A materialized view with an external target doesn't need a table engine. + if (create.is_materialized_view_with_external_target()) + return; + + if (auto to_engine = create.getTargetInnerEngine(ViewTarget::To)) + { + /// This materialized view already has a storage definition. + if (!to_engine->engine) + { + /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. + setDefaultTableEngine(*to_engine, getContext()->getSettingsRef().default_table_engine.value); + } + return; + } + } + if (create.storage) { - /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. + /// This table already has a storage definition. if (!create.storage->engine) + { + /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); + } + /// For external tables with restore_replace_external_engine_to_null setting we replace external engines to + /// Null table engine. + else if (getContext()->getSettingsRef().restore_replace_external_engines_to_null) + { + if (StorageFactory::instance().getStorageFeatures(create.storage->engine->name).source_access_type != AccessType::NONE) + setNullTableEngine(*create.storage); + } return; } + /// We'll try to extract a storage definition from clause `AS`: + /// CREATE TABLE table_name AS other_table_name + std::shared_ptr storage_def; if (!create.as_table.empty()) { /// NOTE Getting the structure from the table specified in the AS is done not atomically with the creation of the table. @@ -1018,12 +1071,14 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (as_create.is_ordinary_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a View", qualified_name); - if (as_create.is_materialized_view && as_create.to_table_id) + if (as_create.is_materialized_view_with_external_target()) + { throw Exception( ErrorCodes::INCORRECT_QUERY, - "Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS `{}`\" instead", + "Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS {}\" instead", qualified_name, - as_create.to_table_id.getQualifiedName()); + as_create.getTargetTableID(ViewTarget::To).getFullTableName()); + } if (as_create.is_live_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Live View", qualified_name); @@ -1034,18 +1089,37 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (as_create.is_dictionary) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Dictionary", qualified_name); - if (as_create.storage) - create.set(create.storage, as_create.storage->ptr()); + if (as_create.is_materialized_view) + { + storage_def = as_create.getTargetInnerEngine(ViewTarget::To); + } else if (as_create.as_table_function) + { create.set(create.as_table_function, as_create.as_table_function->ptr()); + return; + } + else if (as_create.storage) + { + storage_def = typeid_cast>(as_create.storage->ptr()); + } else + { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug."); - - return; + } } - create.set(create.storage, std::make_shared()); - setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); + if (!storage_def) + { + /// Set ENGINE by default. + storage_def = std::make_shared(); + setDefaultTableEngine(*storage_def, getContext()->getSettingsRef().default_table_engine.value); + } + + /// Use the found table engine to modify the create query. + if (create.is_materialized_view) + create.setTargetInnerEngine(ViewTarget::To, storage_def); + else + create.set(create.storage, storage_def); } void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const DatabasePtr & database) const @@ -1087,11 +1161,11 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data kind_upper, create.table); } - create.generateRandomUUID(); + create.generateRandomUUIDs(); } else { - bool has_uuid = create.uuid != UUIDHelpers::Nil || create.to_inner_uuid != UUIDHelpers::Nil; + bool has_uuid = (create.uuid != UUIDHelpers::Nil) || create.hasInnerUUIDs(); if (has_uuid && !is_on_cluster && !internal) { /// We don't show the following error message either @@ -1106,8 +1180,7 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data /// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either /// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or /// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts. - create.uuid = UUIDHelpers::Nil; - create.to_inner_uuid = UUIDHelpers::Nil; + create.resetUUIDs(); } } @@ -1131,6 +1204,14 @@ void checkTableCanBeAddedWithNoCyclicDependencies(const ASTCreateQuery & create, DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(qualified_name, ref_dependencies, loading_dependencies); } +bool isReplicated(const ASTStorage & storage) +{ + if (!storage.engine) + return false; + const auto & storage_name = storage.engine->name; + return storage_name.starts_with("Replicated") || storage_name.starts_with("Shared"); +} + } BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) @@ -1247,8 +1328,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (!create.temporary && !create.database) create.setDatabase(current_database); - if (create.to_table_id && create.to_table_id.database_name.empty()) - create.to_table_id.database_name = current_database; + + if (create.targets) + create.targets->setCurrentDatabase(current_database); if (create.select && create.isView()) { @@ -1282,12 +1364,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create, mode); /// Check type compatible for materialized dest table and select columns - if (create.select && create.is_materialized_view && create.to_table_id && mode <= LoadingStrictnessLevel::CREATE) + if (create.is_materialized_view_with_external_target() && create.select && mode <= LoadingStrictnessLevel::CREATE) { - if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable( - {create.to_table_id.database_name, create.to_table_id.table_name, create.to_table_id.uuid}, - getContext() - )) + if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable(create.getTargetTableID(ViewTarget::To), getContext())) { Block input_block; @@ -1329,23 +1408,37 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); - bool allow_heavy_create = getContext()->getSettingsRef().database_replicated_allow_heavy_create; - if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) + bool allow_heavy_populate = getContext()->getSettingsRef().database_replicated_allow_heavy_create && create.is_populate; + if (!allow_heavy_populate && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; - if (create.storage && create.storage->engine) + + if (create.storage && isReplicated(*create.storage)) + is_storage_replicated = true; + + if (create.targets) { - const auto & storage_name = create.storage->engine->name; - if (storage_name.starts_with("Replicated") || storage_name.starts_with("Shared")) - is_storage_replicated = true; + for (const auto & inner_table_engine : create.targets->getInnerEngines()) + { + if (isReplicated(*inner_table_engine)) + is_storage_replicated = true; + } } const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated; if (!allow_create_select_for_replicated) + { + /// POPULATE can be enabled with setting, provide hint in error message + if (create.is_populate) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "CREATE with POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. " + "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution"); + throw Exception( ErrorCodes::SUPPORT_IS_DISABLED, - "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. " - "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution"); + "CREATE AS SELECT is not supported with Replicated databases. Consider using separate CREATE and INSERT queries."); + } } if (database && database->shouldReplicateQuery(getContext(), query_ptr)) @@ -1601,13 +1694,13 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, } UInt64 table_num_limit = getContext()->getGlobalContext()->getServerSettings().max_table_num_to_throw; - if (table_num_limit > 0 && create.getDatabase() != DatabaseCatalog::SYSTEM_DATABASE) + if (table_num_limit > 0 && !internal) { UInt64 table_count = CurrentMetrics::get(CurrentMetrics::AttachedTable); if (table_count >= table_num_limit) throw Exception(ErrorCodes::TOO_MANY_TABLES, - "Too many tables in the Clickhouse. " - "The limit (setting 'max_table_num_to_throw') is set to {}, current number of tables is {}", + "Too many tables. " + "The limit (server configuration parameter `max_table_num_to_throw`) is set to {}, the current number of tables is {}", table_num_limit, table_count); } @@ -1796,7 +1889,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont /// For CREATE query generate UUID on initiator, so it will be the same on all hosts. /// It will be ignored if database does not support UUIDs. - create.generateRandomUUID(); + create.generateRandomUUIDs(); /// For cross-replication cluster we cannot use UUID in replica path. String cluster_name_expanded = local_context->getMacros()->expand(cluster_name); @@ -1918,8 +2011,15 @@ AccessRightsElements InterpreterCreateQuery::getRequiredAccess() const } } - if (create.to_table_id) - required_access.emplace_back(AccessType::SELECT | AccessType::INSERT, create.to_table_id.database_name, create.to_table_id.table_name); + if (create.targets) + { + for (const auto & target : create.targets->targets) + { + const auto & target_id = target.table_id; + if (target_id) + required_access.emplace_back(AccessType::SELECT | AccessType::INSERT, target_id.database_name, target_id.table_name); + } + } if (create.storage && create.storage->engine) required_access.emplace_back(AccessType::TABLE_ENGINE, create.storage->engine->name); diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index b68b3ddcd48..bad3e5277db 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -399,10 +399,9 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, if (query.if_empty) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP IF EMPTY is not implemented for databases"); - if (database->hasReplicationThread()) + if (!truncate && database->hasReplicationThread()) database->stopReplication(); - if (database->shouldBeEmptyOnDetach()) { /// Cancel restarting replicas in that database, wait for remaining RESTART queries to finish. diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 3906cef1995..48013d3ab00 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -390,7 +390,7 @@ Chain InterpreterInsertQuery::buildPreSinkChain( context_ptr, null_as_default); - auto adding_missing_defaults_actions = std::make_shared(adding_missing_defaults_dag); + auto adding_missing_defaults_actions = std::make_shared(std::move(adding_missing_defaults_dag)); /// Actually we don't know structure of input blocks from query/table, /// because some clients break insertion protocol (columns != header) @@ -536,7 +536,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.getHeader().getColumnsWithTypeAndName(), query_sample_block.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + auto actions = std::make_shared(std::move(actions_dag), ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index cd91f9532b9..41306a79198 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -75,7 +75,6 @@ #include #include -#include #include #include #include @@ -178,15 +177,15 @@ FilterDAGInfoPtr generateFilterActions( /// Using separate expression analyzer to prevent any possible alias injection auto syntax_result = TreeRewriter(context).analyzeSelect(query_ast, TreeRewriterResult({}, storage, storage_snapshot)); SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot, {}, false, {}, prepared_sets); - filter_info->actions = std::make_unique(std::move(analyzer.simpleSelectActions()->dag)); + filter_info->actions = std::move(analyzer.simpleSelectActions()->dag); filter_info->column_name = expr_list->children.at(0)->getColumnName(); - filter_info->actions->removeUnusedActions(NameSet{filter_info->column_name}); + filter_info->actions.removeUnusedActions(NameSet{filter_info->column_name}); - for (const auto * node : filter_info->actions->getInputs()) - filter_info->actions->getOutputs().push_back(node); + for (const auto * node : filter_info->actions.getInputs()) + filter_info->actions.getOutputs().push_back(node); - auto required_columns_from_filter = filter_info->actions->getRequiredColumns(); + auto required_columns_from_filter = filter_info->actions.getRequiredColumns(); for (const auto & column : required_columns_from_filter) { @@ -214,11 +213,11 @@ InterpreterSelectQuery::InterpreterSelectQuery( {} InterpreterSelectQuery::InterpreterSelectQuery( - const ASTPtr & query_ptr_, - const ContextPtr & context_, - Pipe input_pipe_, - const SelectQueryOptions & options_) - : InterpreterSelectQuery(query_ptr_, context_, std::move(input_pipe_), nullptr, options_.copy().noSubquery()) + const ASTPtr & query_ptr_, + const ContextPtr & context_, + Pipe input_pipe_, + const SelectQueryOptions & options_) + : InterpreterSelectQuery(query_ptr_, context_, std::move(input_pipe_), nullptr, options_.copy().noSubquery()) {} InterpreterSelectQuery::InterpreterSelectQuery( @@ -227,18 +226,15 @@ InterpreterSelectQuery::InterpreterSelectQuery( const StoragePtr & storage_, const StorageMetadataPtr & metadata_snapshot_, const SelectQueryOptions & options_) - : InterpreterSelectQuery( - query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_) -{ -} + : InterpreterSelectQuery(query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_) +{} InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, const ContextPtr & context_, const SelectQueryOptions & options_, PreparedSetsPtr prepared_sets_) - : InterpreterSelectQuery( - query_ptr_, context_, std::nullopt, nullptr, options_, {}, {}, prepared_sets_) + : InterpreterSelectQuery(query_ptr_, context_, std::nullopt, nullptr, options_, {}, {}, prepared_sets_) {} InterpreterSelectQuery::~InterpreterSelectQuery() = default; @@ -941,7 +937,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() { { const auto & node - = query_info_copy.prewhere_info->prewhere_actions->findInOutputs(query_info_copy.prewhere_info->prewhere_column_name); + = query_info_copy.prewhere_info->prewhere_actions.findInOutputs(query_info_copy.prewhere_info->prewhere_column_name); added_filter_nodes.nodes.push_back(&node); } @@ -953,7 +949,8 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() } } - query_info_copy.filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes); + if (auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes)) + query_info_copy.filter_actions_dag = std::make_shared(std::move(*filter_actions_dag)); UInt64 rows_to_read = storage_merge_tree->estimateNumberOfRowsToRead(context, storage_snapshot, query_info_copy); /// Note that we treat an estimation of 0 rows as a real estimation size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; @@ -988,7 +985,7 @@ void InterpreterSelectQuery::buildQueryPlan(QueryPlan & query_plan) ActionsDAG::MatchColumnsMode::Name, true); - auto converting = std::make_unique(query_plan.getCurrentDataStream(), convert_actions_dag); + auto converting = std::make_unique(query_plan.getCurrentDataStream(), std::move(convert_actions_dag)); query_plan.addStep(std::move(converting)); } @@ -1061,7 +1058,7 @@ Block InterpreterSelectQuery::getSampleBlockImpl() if (analysis_result.prewhere_info) { - header = analysis_result.prewhere_info->prewhere_actions->updateHeader(header); + header = analysis_result.prewhere_info->prewhere_actions.updateHeader(header); if (analysis_result.prewhere_info->remove_prewhere_column) header.erase(analysis_result.prewhere_info->prewhere_column_name); } @@ -1312,12 +1309,12 @@ static InterpolateDescriptionPtr getInterpolateDescription( auto syntax_result = TreeRewriter(context).analyze(exprs, source_columns); ExpressionAnalyzer analyzer(exprs, syntax_result, context); - ActionsDAGPtr actions = analyzer.getActionsDAG(true); - ActionsDAGPtr conv_dag = ActionsDAG::makeConvertingActions(actions->getResultColumns(), + ActionsDAG actions = analyzer.getActionsDAG(true); + ActionsDAG conv_dag = ActionsDAG::makeConvertingActions(actions.getResultColumns(), result_columns, ActionsDAG::MatchColumnsMode::Position, true); - ActionsDAGPtr merge_dag = ActionsDAG::merge(std::move(*actions->clone()), std::move(*conv_dag)); + ActionsDAG merge_dag = ActionsDAG::merge(std::move(actions), std::move(conv_dag)); - interpolate_descr = std::make_shared(merge_dag, aliases); + interpolate_descr = std::make_shared(std::move(merge_dag), aliases); } return interpolate_descr; @@ -1500,7 +1497,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - expressions.filter_info->actions, + expressions.filter_info->actions.clone(), expressions.filter_info->column_name, expressions.filter_info->do_remove_column); @@ -1514,7 +1511,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - expressions.prewhere_info->row_level_filter, + expressions.prewhere_info->row_level_filter->clone(), expressions.prewhere_info->row_level_column_name, true); @@ -1524,7 +1521,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - expressions.prewhere_info->prewhere_actions, + expressions.prewhere_info->prewhere_actions.clone(), expressions.prewhere_info->prewhere_column_name, expressions.prewhere_info->remove_prewhere_column); @@ -1626,7 +1623,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - expressions.filter_info->actions, + expressions.filter_info->actions.clone(), expressions.filter_info->column_name, expressions.filter_info->do_remove_column); @@ -1634,11 +1631,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( query_plan.getCurrentDataStream(), - new_filter_info->actions, + std::move(new_filter_info->actions), new_filter_info->column_name, new_filter_info->do_remove_column); @@ -1726,7 +1723,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

getCurrentDataStream().header, join_clause.key_names_right); - if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys) + if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys) { auto * left_set = add_create_set(query_plan, join_clause.key_names_left, JoinTableSide::Left); auto * right_set = add_create_set(*joined_plan, join_clause.key_names_right, JoinTableSide::Right); @@ -2057,18 +2057,20 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, c if (prewhere_info.row_level_filter) { + auto row_level_actions = std::make_shared(prewhere_info.row_level_filter->clone()); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared(header, - std::make_shared(prewhere_info.row_level_filter), + row_level_actions, prewhere_info.row_level_column_name, true); }); } + auto filter_actions = std::make_shared(prewhere_info.prewhere_actions.clone()); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( - header, std::make_shared(prewhere_info.prewhere_actions), + header, filter_actions, prewhere_info.prewhere_column_name, prewhere_info.remove_prewhere_column); }); } @@ -2112,8 +2114,8 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis if (does_storage_support_prewhere && shouldMoveToPrewhere()) { /// Execute row level filter in prewhere as a part of "move to prewhere" optimization. - analysis.prewhere_info = std::make_shared(analysis.filter_info->actions, analysis.filter_info->column_name); - analysis.prewhere_info->remove_prewhere_column = analysis.filter_info->do_remove_column; + analysis.prewhere_info = std::make_shared(std::move(analysis.filter_info->actions), analysis.filter_info->column_name); + analysis.prewhere_info->remove_prewhere_column = std::move(analysis.filter_info->do_remove_column); analysis.prewhere_info->need_filter = true; analysis.filter_info = nullptr; } @@ -2121,8 +2123,8 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis else { /// Add row level security actions to prewhere. - analysis.prewhere_info->row_level_filter = analysis.filter_info->actions; - analysis.prewhere_info->row_level_column_name = analysis.filter_info->column_name; + analysis.prewhere_info->row_level_filter = std::move(analysis.filter_info->actions); + analysis.prewhere_info->row_level_column_name = std::move(analysis.filter_info->column_name); analysis.filter_info = nullptr; } } @@ -2155,7 +2157,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions() if (prewhere_info) { /// Get some columns directly from PREWHERE expression actions - auto prewhere_required_columns = prewhere_info->prewhere_actions->getRequiredColumns().getNames(); + auto prewhere_required_columns = prewhere_info->prewhere_actions.getRequiredColumns().getNames(); columns.insert(prewhere_required_columns.begin(), prewhere_required_columns.end()); if (prewhere_info->row_level_filter) @@ -2227,7 +2229,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions() if (prewhere_info) { NameSet columns_to_remove(columns_to_remove_after_prewhere.begin(), columns_to_remove_after_prewhere.end()); - Block prewhere_actions_result = prewhere_info->prewhere_actions->getResultColumns(); + Block prewhere_actions_result = prewhere_info->prewhere_actions.getResultColumns(); /// Populate required columns with the columns, added by PREWHERE actions and not removed afterwards. /// XXX: looks hacky that we already know which columns after PREWHERE we won't need for sure. @@ -2266,7 +2268,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions() { /// Don't remove columns which are needed to be aliased. for (const auto & name : required_columns) - prewhere_info->prewhere_actions->tryRestoreColumn(name); + prewhere_info->prewhere_actions.tryRestoreColumn(name); /// Add physical columns required by prewhere actions. for (const auto & column : required_columns_from_prewhere) @@ -2324,7 +2326,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle if (analysis_result.hasPrewhere()) { auto & prewhere_info = analysis_result.prewhere_info; - filter_nodes.push_back(&prewhere_info->prewhere_actions->findInOutputs(prewhere_info->prewhere_column_name)); + filter_nodes.push_back(&prewhere_info->prewhere_actions.findInOutputs(prewhere_info->prewhere_column_name)); if (prewhere_info->row_level_filter) filter_nodes.push_back(&prewhere_info->row_level_filter->findInOutputs(prewhere_info->row_level_column_name)); @@ -2338,7 +2340,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle if (!filter_actions_dag) return {}; - return storage->totalRowsByPartitionPredicate(filter_actions_dag, context); + return storage->totalRowsByPartitionPredicate(*filter_actions_dag, context); } } @@ -2588,7 +2590,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc /// Aliases in table declaration. if (processing_stage == QueryProcessingStage::FetchColumns && alias_actions) { - auto table_aliases = std::make_unique(query_plan.getCurrentDataStream(), alias_actions); + auto table_aliases = std::make_unique(query_plan.getCurrentDataStream(), alias_actions->clone()); table_aliases->setStepDescription("Add table aliases"); query_plan.addStep(std::move(table_aliases)); } @@ -2598,7 +2600,7 @@ void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsA { auto dag = expression->dag.clone(); if (expression->project_input) - dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); auto where_step = std::make_unique( query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().where()->getColumnName(), remove_filter); @@ -2772,7 +2774,7 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Actions { auto dag = expression->dag.clone(); if (expression->project_input) - dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); auto having_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().having()->getColumnName(), remove_filter); @@ -2785,7 +2787,7 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Actions void InterpreterSelectQuery::executeTotalsAndHaving( QueryPlan & query_plan, bool has_having, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter, bool overflow_row, bool final) { - ActionsDAGPtr dag; + std::optional dag; if (expression) { dag = expression->dag.clone(); @@ -2839,7 +2841,7 @@ void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const Act auto dag = expression->dag.clone(); if (expression->project_input) - dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(dag)); diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index d4ed19d45ea..a01a390f3c7 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -26,7 +26,6 @@ class Logger; namespace DB { -class SubqueryForSet; class InterpreterSelectWithUnionQuery; class Context; class QueryPlan; @@ -240,7 +239,7 @@ private: Block source_header; /// Actions to calculate ALIAS if required. - ActionsDAGPtr alias_actions; + std::optional alias_actions; /// The subquery interpreter, if the subquery std::unique_ptr interpreter_subquery; diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 0fca7b64d5a..e5549b2e539 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -1,9 +1,7 @@ #include #include -#include #include #include -#include #include #include #include @@ -94,7 +92,8 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl() { auto & create = create_query->as(); create.uuid = UUIDHelpers::Nil; - create.to_inner_uuid = UUIDHelpers::Nil; + if (create.targets) + create.targets->resetInnerUUIDs(); } MutableColumnPtr column = ColumnString::create(); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 480c6736bc5..57ad5caa4c7 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -1200,7 +1200,7 @@ void MutationsInterpreter::Source::read( const auto & names = first_stage.filter_column_names; size_t num_filters = names.size(); - ActionsDAGPtr filter; + std::optional filter; if (!first_stage.filter_column_names.empty()) { ActionsDAG::NodeRawConstPtrs nodes(num_filters); @@ -1214,7 +1214,7 @@ void MutationsInterpreter::Source::read( MergeTreeSequentialSourceType::Mutation, plan, *data, storage_snapshot, part, required_columns, - apply_deleted_mask_, filter, context_, + apply_deleted_mask_, std::move(filter), context_, getLogger("MutationsInterpreter")); } else @@ -1283,17 +1283,17 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v { auto dag = step->actions()->dag.clone(); if (step->actions()->project_input) - dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); + dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute DELETEs. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), dag, stage.filter_column_names[i], false)); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), std::move(dag), stage.filter_column_names[i], false)); } else { auto dag = step->actions()->dag.clone(); if (step->actions()->project_input) - dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); + dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute UPDATE or final projection. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), dag)); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), std::move(dag))); } } diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 4821d607d0e..7296bdaf383 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -16,7 +17,6 @@ #include #include -#include #include #include #include @@ -29,6 +29,7 @@ #include #include + namespace DB { @@ -95,22 +96,22 @@ NamesAndTypesList getColumnsList(const ASTExpressionList * columns_definition) } ASTPtr data_type = declare_column->data_type; - auto * data_type_function = data_type->as(); + auto * data_type_node = data_type->as(); - if (data_type_function) + if (data_type_node) { - String type_name_upper = Poco::toUpper(data_type_function->name); + String type_name_upper = Poco::toUpper(data_type_node->name); if (is_unsigned) { /// For example(in MySQL): CREATE TABLE test(column_name INT NOT NULL ... UNSIGNED) if (type_name_upper.find("INT") != String::npos && !endsWith(type_name_upper, "SIGNED") && !endsWith(type_name_upper, "UNSIGNED")) - data_type_function->name = type_name_upper + " UNSIGNED"; + data_type_node->name = type_name_upper + " UNSIGNED"; } if (type_name_upper == "SET") - data_type_function->arguments.reset(); + data_type_node->arguments.reset(); /// Transforms MySQL ENUM's list of strings to ClickHouse string-integer pairs /// For example ENUM('a', 'b', 'c') -> ENUM('a'=1, 'b'=2, 'c'=3) @@ -119,7 +120,7 @@ NamesAndTypesList getColumnsList(const ASTExpressionList * columns_definition) if (type_name_upper.find("ENUM") != String::npos) { UInt16 i = 0; - for (ASTPtr & child : data_type_function->arguments->children) + for (ASTPtr & child : data_type_node->arguments->children) { auto new_child = std::make_shared(); new_child->name = "equals"; @@ -133,10 +134,10 @@ NamesAndTypesList getColumnsList(const ASTExpressionList * columns_definition) } if (type_name_upper == "DATE") - data_type_function->name = "Date32"; + data_type_node->name = "Date32"; } if (is_nullable) - data_type = makeASTFunction("Nullable", data_type); + data_type = makeASTDataType("Nullable", data_type); columns_name_and_type.emplace_back(declare_column->name, DataTypeFactory::instance().get(data_type)); } @@ -156,7 +157,7 @@ static ColumnsDescription createColumnsDescription(const NamesAndTypesList & col /// (see git blame for details). auto column_name_and_type = columns_name_and_type.begin(); const auto * declare_column_ast = columns_definition->children.begin(); - for (; column_name_and_type != columns_name_and_type.end(); column_name_and_type++, declare_column_ast++) + for (; column_name_and_type != columns_name_and_type.end(); ++column_name_and_type, ++declare_column_ast) { const auto & declare_column = (*declare_column_ast)->as(); String comment; @@ -182,7 +183,7 @@ static NamesAndTypesList getNames(const ASTFunction & expr, ContextPtr context, ASTPtr temp_ast = expr.clone(); auto syntax = TreeRewriter(context).analyze(temp_ast, columns); - auto required_columns = ExpressionAnalyzer(temp_ast, syntax, context).getActionsDAG(false)->getRequiredColumns(); + auto required_columns = ExpressionAnalyzer(temp_ast, syntax, context).getActionsDAG(false).getRequiredColumns(); return required_columns; } @@ -482,7 +483,7 @@ ASTs InterpreterCreateImpl::getRewrittenQueries( { auto column_declaration = std::make_shared(); column_declaration->name = name; - column_declaration->type = makeASTFunction(type); + column_declaration->type = makeASTDataType(type); column_declaration->default_specifier = "MATERIALIZED"; column_declaration->default_expression = std::make_shared(default_value); column_declaration->children.emplace_back(column_declaration->type); diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp index 6d6077a0295..81e6e6a8761 100644 --- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp +++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp @@ -2,12 +2,10 @@ #include -#include #include #include #include #include -#include #include #include #include @@ -26,8 +24,8 @@ static inline ASTPtr tryRewrittenCreateQuery(const String & query, ContextPtr co context, "test_database", "test_database")[0]; } -static const char MATERIALIZEDMYSQL_TABLE_COLUMNS[] = ", `_sign` Int8() MATERIALIZED 1" - ", `_version` UInt64() MATERIALIZED 1" +static const char MATERIALIZEDMYSQL_TABLE_COLUMNS[] = ", `_sign` Int8 MATERIALIZED 1" + ", `_version` UInt64 MATERIALIZED 1" ", INDEX _version _version TYPE minmax GRANULARITY 1"; TEST(MySQLCreateRewritten, ColumnsDataType) diff --git a/src/Interpreters/SubstituteColumnOptimizer.cpp b/src/Interpreters/SubstituteColumnOptimizer.cpp index c4aef89fed2..925ded15857 100644 --- a/src/Interpreters/SubstituteColumnOptimizer.cpp +++ b/src/Interpreters/SubstituteColumnOptimizer.cpp @@ -13,10 +13,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} namespace { @@ -237,16 +233,8 @@ void SubstituteColumnOptimizer::perform() const auto & compare_graph = metadata_snapshot->getConstraints().getGraph(); - // Fill aliases - if (select_query->select()) - { - auto * list = select_query->refSelect()->as(); - if (!list) - throw Exception(ErrorCodes::LOGICAL_ERROR, "List of selected columns must be ASTExpressionList"); - - for (ASTPtr & ast : list->children) - ast->setAlias(ast->getAliasOrColumnName()); - } + if (compare_graph.getNumOfComponents() == 0) + return; auto run_for_all = [&](const auto func) { diff --git a/src/Interpreters/SubstituteColumnOptimizer.h b/src/Interpreters/SubstituteColumnOptimizer.h index 28aa8be0801..ecb65cd7707 100644 --- a/src/Interpreters/SubstituteColumnOptimizer.h +++ b/src/Interpreters/SubstituteColumnOptimizer.h @@ -15,7 +15,7 @@ struct StorageInMemoryMetadata; using StorageMetadataPtr = std::shared_ptr; /// Optimizer that tries to replace columns to equal columns (according to constraints) -/// with lower size (according to compressed and uncomressed size). +/// with lower size (according to compressed and uncompressed sizes). class SubstituteColumnOptimizer { public: diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 6191eb73fd4..c8c926db13c 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -462,19 +462,19 @@ static void makeColumnNameUnique(const ColumnsWithTypeAndName & source_columns, } } -static ActionsDAGPtr createWrapWithTupleActions( +static std::optional createWrapWithTupleActions( const ColumnsWithTypeAndName & source_columns, std::unordered_set && column_names_to_wrap, NameToNameMap & new_names) { if (column_names_to_wrap.empty()) - return nullptr; + return {}; - auto actions_dag = std::make_shared(source_columns); + ActionsDAG actions_dag(source_columns); FunctionOverloadResolverPtr func_builder = std::make_unique(std::make_shared()); - for (const auto * input_node : actions_dag->getInputs()) + for (const auto * input_node : actions_dag.getInputs()) { const auto & column_name = input_node->result_name; auto it = column_names_to_wrap.find(column_name); @@ -485,9 +485,9 @@ static ActionsDAGPtr createWrapWithTupleActions( String node_name = "__wrapNullsafe(" + column_name + ")"; makeColumnNameUnique(source_columns, node_name); - const auto & dst_node = actions_dag->addFunction(func_builder, {input_node}, node_name); + const auto & dst_node = actions_dag.addFunction(func_builder, {input_node}, node_name); new_names[column_name] = dst_node.result_name; - actions_dag->addOrReplaceInOutputs(dst_node); + actions_dag.addOrReplaceInOutputs(dst_node); } if (!column_names_to_wrap.empty()) @@ -537,21 +537,23 @@ std::pair TableJoin::getKeysForNullSafeComparion(const Columns return {left_keys_to_wrap, right_keys_to_wrap}; } -static void mergeDags(ActionsDAGPtr & result_dag, ActionsDAGPtr && new_dag) +static void mergeDags(std::optional & result_dag, std::optional && new_dag) { + if (!new_dag) + return; if (result_dag) result_dag->mergeInplace(std::move(*new_dag)); else result_dag = std::move(new_dag); } -std::pair +std::pair, std::optional> TableJoin::createConvertingActions( const ColumnsWithTypeAndName & left_sample_columns, const ColumnsWithTypeAndName & right_sample_columns) { - ActionsDAGPtr left_dag = nullptr; - ActionsDAGPtr right_dag = nullptr; + std::optional left_dag; + std::optional right_dag; /** If the types are not equal, we need to convert them to a common type. * Example: * SELECT * FROM t1 JOIN t2 ON t1.a = t2.b @@ -616,7 +618,7 @@ TableJoin::createConvertingActions( mergeDags(right_dag, std::move(new_right_dag)); } - return {left_dag, right_dag}; + return {std::move(left_dag), std::move(right_dag)}; } template @@ -693,7 +695,7 @@ void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const Rig } } -static ActionsDAGPtr changeKeyTypes(const ColumnsWithTypeAndName & cols_src, +static std::optional changeKeyTypes(const ColumnsWithTypeAndName & cols_src, const TableJoin::NameToTypeMap & type_mapping, bool add_new_cols, NameToNameMap & key_column_rename) @@ -710,7 +712,7 @@ static ActionsDAGPtr changeKeyTypes(const ColumnsWithTypeAndName & cols_src, } } if (!has_some_to_do) - return nullptr; + return {}; return ActionsDAG::makeConvertingActions( /* source= */ cols_src, @@ -721,7 +723,7 @@ static ActionsDAGPtr changeKeyTypes(const ColumnsWithTypeAndName & cols_src, /* new_names= */ &key_column_rename); } -static ActionsDAGPtr changeTypesToNullable( +static std::optional changeTypesToNullable( const ColumnsWithTypeAndName & cols_src, const NameSet & exception_cols) { @@ -737,7 +739,7 @@ static ActionsDAGPtr changeTypesToNullable( } if (!has_some_to_do) - return nullptr; + return {}; return ActionsDAG::makeConvertingActions( /* source= */ cols_src, @@ -748,29 +750,29 @@ static ActionsDAGPtr changeTypesToNullable( /* new_names= */ nullptr); } -ActionsDAGPtr TableJoin::applyKeyConvertToTable( +std::optional TableJoin::applyKeyConvertToTable( const ColumnsWithTypeAndName & cols_src, const NameToTypeMap & type_mapping, JoinTableSide table_side, NameToNameMap & key_column_rename) { if (type_mapping.empty()) - return nullptr; + return {}; /// Create DAG to convert key columns - ActionsDAGPtr convert_dag = changeKeyTypes(cols_src, type_mapping, !hasUsing(), key_column_rename); + auto convert_dag = changeKeyTypes(cols_src, type_mapping, !hasUsing(), key_column_rename); applyRename(table_side, key_column_rename); return convert_dag; } -ActionsDAGPtr TableJoin::applyNullsafeWrapper( +std::optional TableJoin::applyNullsafeWrapper( const ColumnsWithTypeAndName & cols_src, const NameSet & columns_for_nullsafe_comparison, JoinTableSide table_side, NameToNameMap & key_column_rename) { if (columns_for_nullsafe_comparison.empty()) - return nullptr; + return {}; std::unordered_set column_names_to_wrap; for (const auto & name : columns_for_nullsafe_comparison) @@ -784,7 +786,7 @@ ActionsDAGPtr TableJoin::applyNullsafeWrapper( } /// Create DAG to wrap keys with tuple for null-safe comparison - ActionsDAGPtr null_safe_wrap_dag = createWrapWithTupleActions(cols_src, std::move(column_names_to_wrap), key_column_rename); + auto null_safe_wrap_dag = createWrapWithTupleActions(cols_src, std::move(column_names_to_wrap), key_column_rename); for (auto & clause : clauses) { for (size_t i : clause.nullsafe_compare_key_indexes) @@ -799,7 +801,7 @@ ActionsDAGPtr TableJoin::applyNullsafeWrapper( return null_safe_wrap_dag; } -ActionsDAGPtr TableJoin::applyJoinUseNullsConversion( +std::optional TableJoin::applyJoinUseNullsConversion( const ColumnsWithTypeAndName & cols_src, const NameToNameMap & key_column_rename) { @@ -809,8 +811,7 @@ ActionsDAGPtr TableJoin::applyJoinUseNullsConversion( exclude_columns.insert(it.second); /// Create DAG to make columns nullable if needed - ActionsDAGPtr add_nullable_dag = changeTypesToNullable(cols_src, exclude_columns); - return add_nullable_dag; + return changeTypesToNullable(cols_src, exclude_columns); } void TableJoin::setStorageJoin(std::shared_ptr storage) @@ -957,7 +958,7 @@ bool TableJoin::allowParallelHashJoin() const return true; } -ActionsDAGPtr TableJoin::createJoinedBlockActions(ContextPtr context) const +ActionsDAG TableJoin::createJoinedBlockActions(ContextPtr context) const { ASTPtr expression_list = rightKeysList(); auto syntax_result = TreeRewriter(context).analyze(expression_list, columnsFromJoinedTable()); diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index ed14fe4273c..3f2bebb5816 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -201,19 +201,19 @@ private: Names requiredJoinedNames() const; /// Create converting actions and change key column names if required - ActionsDAGPtr applyKeyConvertToTable( + std::optional applyKeyConvertToTable( const ColumnsWithTypeAndName & cols_src, const NameToTypeMap & type_mapping, JoinTableSide table_side, NameToNameMap & key_column_rename); - ActionsDAGPtr applyNullsafeWrapper( + std::optional applyNullsafeWrapper( const ColumnsWithTypeAndName & cols_src, const NameSet & columns_for_nullsafe_comparison, JoinTableSide table_side, NameToNameMap & key_column_rename); - ActionsDAGPtr applyJoinUseNullsConversion( + std::optional applyJoinUseNullsConversion( const ColumnsWithTypeAndName & cols_src, const NameToNameMap & key_column_rename); @@ -263,7 +263,7 @@ public: TemporaryDataOnDiskScopePtr getTempDataOnDisk() { return tmp_data; } - ActionsDAGPtr createJoinedBlockActions(ContextPtr context) const; + ActionsDAG createJoinedBlockActions(ContextPtr context) const; const std::vector & getEnabledJoinAlgorithms() const { return join_algorithm; } @@ -378,7 +378,7 @@ public: /// Calculate converting actions, rename key columns in required /// For `USING` join we will convert key columns inplace and affect into types in the result table /// For `JOIN ON` we will create new columns with converted keys to join by. - std::pair + std::pair, std::optional> createConvertingActions( const ColumnsWithTypeAndName & left_sample_columns, const ColumnsWithTypeAndName & right_sample_columns); diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h index c26e4517c9a..d51d9ca94d8 100644 --- a/src/Interpreters/WindowDescription.h +++ b/src/Interpreters/WindowDescription.h @@ -14,7 +14,6 @@ namespace DB class ASTFunction; class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; struct WindowFunctionDescription { @@ -93,8 +92,8 @@ struct WindowDescription // then by ORDER BY. This field holds this combined sort order. SortDescription full_sort_description; - std::vector partition_by_actions; - std::vector order_by_actions; + std::vector> partition_by_actions; + std::vector> order_by_actions; WindowFrame frame; diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp index fbf17d7efb7..27d79e86622 100644 --- a/src/Interpreters/addMissingDefaults.cpp +++ b/src/Interpreters/addMissingDefaults.cpp @@ -14,15 +14,15 @@ namespace DB { -ActionsDAGPtr addMissingDefaults( +ActionsDAG addMissingDefaults( const Block & header, const NamesAndTypesList & required_columns, const ColumnsDescription & columns, ContextPtr context, bool null_as_default) { - auto actions = std::make_shared(header.getColumnsWithTypeAndName()); - auto & index = actions->getOutputs(); + ActionsDAG actions(header.getColumnsWithTypeAndName()); + auto & index = actions.getOutputs(); /// For missing columns of nested structure, you need to create not a column of empty arrays, but a column of arrays of correct lengths. /// First, remember the offset columns for all arrays in the block. @@ -40,7 +40,7 @@ ActionsDAGPtr addMissingDefaults( if (group.empty()) group.push_back(nullptr); - group.push_back(actions->getInputs()[i]); + group.push_back(actions.getInputs()[i]); } } @@ -62,11 +62,11 @@ ActionsDAGPtr addMissingDefaults( { const auto & nested_type = array_type->getNestedType(); ColumnPtr nested_column = nested_type->createColumnConstWithDefaultValue(0); - const auto & constant = actions->addColumn({nested_column, nested_type, column.name}); + const auto & constant = actions.addColumn({nested_column, nested_type, column.name}); auto & group = nested_groups[offsets_name]; group[0] = &constant; - index.push_back(&actions->addFunction(func_builder_replicate, group, constant.result_name)); + index.push_back(&actions.addFunction(func_builder_replicate, group, constant.result_name)); continue; } @@ -75,17 +75,17 @@ ActionsDAGPtr addMissingDefaults( * it can be full (or the interpreter may decide that it is constant everywhere). */ auto new_column = column.type->createColumnConstWithDefaultValue(0); - const auto * col = &actions->addColumn({new_column, column.type, column.name}); - index.push_back(&actions->materializeNode(*col)); + const auto * col = &actions.addColumn({new_column, column.type, column.name}); + index.push_back(&actions.materializeNode(*col)); } /// Computes explicitly specified values by default and materialized columns. - if (auto dag = evaluateMissingDefaults(actions->getResultColumns(), required_columns, columns, context, true, null_as_default)) - actions = ActionsDAG::merge(std::move(*actions), std::move(*dag)); + if (auto dag = evaluateMissingDefaults(actions.getResultColumns(), required_columns, columns, context, true, null_as_default)) + actions = ActionsDAG::merge(std::move(actions), std::move(*dag)); /// Removes unused columns and reorders result. - actions->removeUnusedActions(required_columns.getNames(), false); - actions->addMaterializingOutputActions(); + actions.removeUnusedActions(required_columns.getNames(), false); + actions.addMaterializingOutputActions(); return actions; } diff --git a/src/Interpreters/addMissingDefaults.h b/src/Interpreters/addMissingDefaults.h index 0a3d4de478c..551583a0006 100644 --- a/src/Interpreters/addMissingDefaults.h +++ b/src/Interpreters/addMissingDefaults.h @@ -2,11 +2,6 @@ #include -#include -#include -#include - - namespace DB { @@ -15,7 +10,6 @@ class NamesAndTypesList; class ColumnsDescription; class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; /** Adds three types of columns into block * 1. Columns, that are missed inside request, but present in table without defaults (missed columns) @@ -24,7 +18,7 @@ using ActionsDAGPtr = std::shared_ptr; * Also can substitute NULL with DEFAULT value in case of INSERT SELECT query (null_as_default) if according setting is 1. * All three types of columns are materialized (not constants). */ -ActionsDAGPtr addMissingDefaults( +ActionsDAG addMissingDefaults( const Block & header, const NamesAndTypesList & required_columns, const ColumnsDescription & columns, ContextPtr context, bool null_as_default = false); } diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 184c263dbdb..1a7c166c6a5 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -356,7 +356,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID for (size_t i = 0; i < src_arr_size; ++i) { res[i] = convertFieldToType(src_arr[i], element_type); - if (res[i].isNull() && !element_type.isNullable()) + if (res[i].isNull() && !canContainNull(element_type)) { // See the comment for Tuples below. have_unconvertible_element = true; @@ -384,25 +384,25 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { const auto & element_type = *(type_tuple->getElements()[i]); res[i] = convertFieldToType(src_tuple[i], element_type); - if (!res[i].isNull() || element_type.isNullable()) - continue; - - /* - * Either the source element was Null, or the conversion did not - * succeed, because the source and the requested types of the - * element are compatible, but the value is not convertible - * (e.g. trying to convert -1 from Int8 to UInt8). In these - * cases, consider the whole tuple also compatible but not - * convertible. According to the specification of this function, - * we must return Null in this case. - * - * The following elements might be not even compatible, so it - * makes sense to check them to detect user errors. Remember - * that there is an unconvertible element, and try to process - * the remaining ones. The convertFieldToType for each element - * will throw if it detects incompatibility. - */ - have_unconvertible_element = true; + if (res[i].isNull() && !canContainNull(element_type)) + { + /* + * Either the source element was Null, or the conversion did not + * succeed, because the source and the requested types of the + * element are compatible, but the value is not convertible + * (e.g. trying to convert -1 from Int8 to UInt8). In these + * cases, consider the whole tuple also compatible but not + * convertible. According to the specification of this function, + * we must return Null in this case. + * + * The following elements might be not even compatible, so it + * makes sense to check them to detect user errors. Remember + * that there is an unconvertible element, and try to process + * the remaining ones. The convertFieldToType for each element + * will throw if it detects incompatibility. + */ + have_unconvertible_element = true; + } } return have_unconvertible_element ? Field(Null()) : Field(res); @@ -433,11 +433,11 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID updated_entry[0] = convertFieldToType(key, key_type); - if (updated_entry[0].isNull() && !key_type.isNullable()) + if (updated_entry[0].isNull() && !canContainNull(key_type)) have_unconvertible_element = true; updated_entry[1] = convertFieldToType(value, value_type); - if (updated_entry[1].isNull() && !value_type.isNullable()) + if (updated_entry[1].isNull() && !canContainNull(value_type)) have_unconvertible_element = true; res[i] = updated_entry; @@ -545,7 +545,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID catch (Exception & e) { if (e.code() == ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string {} to type {}", src.get(), type.getName()); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.get(), type.getName()); e.addMessage(fmt::format("while converting '{}' to {}", src.get(), type.getName())); throw; @@ -592,7 +592,7 @@ Field convertFieldToType(const Field & from_value, const IDataType & to_type, co Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_type, const IDataType * from_type_hint) { bool is_null = from_value.isNull(); - if (is_null && !to_type.isNullable() && !to_type.isLowCardinalityNullable()) + if (is_null && !canContainNull(to_type)) throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert NULL to {}", to_type.getName()); Field converted = convertFieldToType(from_value, to_type, from_type_hint); diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index ac80ad2fb68..4bfc80af1fe 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -91,7 +91,7 @@ std::optional evaluateConstantExpressionImpl(c ColumnPtr result_column; DataTypePtr result_type; String result_name = ast->getColumnName(); - for (const auto & action_node : actions->getOutputs()) + for (const auto & action_node : actions.getOutputs()) { if ((action_node->result_name == result_name) && action_node->column) { @@ -679,9 +679,9 @@ std::optional evaluateExpressionOverConstantCondition( size_t max_elements) { auto inverted_dag = KeyCondition::cloneASTWithInversionPushDown({predicate}, context); - auto matches = matchTrees(expr, *inverted_dag, false); + auto matches = matchTrees(expr, inverted_dag, false); - auto predicates = analyze(inverted_dag->getOutputs().at(0), matches, context, max_elements); + auto predicates = analyze(inverted_dag.getOutputs().at(0), matches, context, max_elements); if (!predicates) return {}; diff --git a/src/Interpreters/examples/CMakeLists.txt b/src/Interpreters/examples/CMakeLists.txt index 8bb7f9eeb98..4b1ec970b26 100644 --- a/src/Interpreters/examples/CMakeLists.txt +++ b/src/Interpreters/examples/CMakeLists.txt @@ -2,34 +2,34 @@ clickhouse_add_executable (hash_map hash_map.cpp) target_link_libraries (hash_map PRIVATE dbms clickhouse_functions ch_contrib::sparsehash) clickhouse_add_executable (hash_map_lookup hash_map_lookup.cpp) -target_link_libraries (hash_map_lookup PRIVATE clickhouse_common_io clickhouse_compression) +target_link_libraries (hash_map_lookup PRIVATE clickhouse_common_io clickhouse_common_config clickhouse_compression) clickhouse_add_executable (hash_map3 hash_map3.cpp) -target_link_libraries (hash_map3 PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::farmhash ch_contrib::metrohash) +target_link_libraries (hash_map3 PRIVATE clickhouse_common_io clickhouse_common_config clickhouse_compression ch_contrib::farmhash ch_contrib::metrohash) clickhouse_add_executable (hash_map_string hash_map_string.cpp) -target_link_libraries (hash_map_string PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::sparsehash) +target_link_libraries (hash_map_string PRIVATE clickhouse_common_io clickhouse_common_config clickhouse_compression ch_contrib::sparsehash) clickhouse_add_executable (hash_map_string_2 hash_map_string_2.cpp) -target_link_libraries (hash_map_string_2 PRIVATE clickhouse_common_io clickhouse_compression) +target_link_libraries (hash_map_string_2 PRIVATE clickhouse_common_io clickhouse_common_config clickhouse_compression) clickhouse_add_executable (hash_map_string_3 hash_map_string_3.cpp) -target_link_libraries (hash_map_string_3 PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::farmhash ch_contrib::metrohash) +target_link_libraries (hash_map_string_3 PRIVATE clickhouse_common_io clickhouse_common_config clickhouse_compression ch_contrib::farmhash ch_contrib::metrohash) clickhouse_add_executable (hash_map_string_small hash_map_string_small.cpp) -target_link_libraries (hash_map_string_small PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::sparsehash) +target_link_libraries (hash_map_string_small PRIVATE clickhouse_common_io clickhouse_common_config clickhouse_compression ch_contrib::sparsehash) clickhouse_add_executable (string_hash_map string_hash_map.cpp) -target_link_libraries (string_hash_map PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::sparsehash) +target_link_libraries (string_hash_map PRIVATE clickhouse_common_io clickhouse_common_config clickhouse_compression ch_contrib::sparsehash) clickhouse_add_executable (string_hash_map_aggregation string_hash_map.cpp) -target_link_libraries (string_hash_map_aggregation PRIVATE clickhouse_common_io clickhouse_compression) +target_link_libraries (string_hash_map_aggregation PRIVATE clickhouse_common_io clickhouse_common_config clickhouse_compression) clickhouse_add_executable (string_hash_set string_hash_set.cpp) -target_link_libraries (string_hash_set PRIVATE clickhouse_common_io clickhouse_compression) +target_link_libraries (string_hash_set PRIVATE clickhouse_common_io clickhouse_common_config clickhouse_compression) clickhouse_add_executable (two_level_hash_map two_level_hash_map.cpp) -target_link_libraries (two_level_hash_map PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::sparsehash) +target_link_libraries (two_level_hash_map PRIVATE clickhouse_common_io clickhouse_common_config clickhouse_compression ch_contrib::sparsehash) clickhouse_add_executable (jit_example jit_example.cpp) target_link_libraries (jit_example PRIVATE dbms) diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 5d237d28089..1b57ad2b622 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -538,7 +538,7 @@ Chunk DDLQueryStatusSource::generate() ExecutionStatus status(-1, "Cannot obtain error message"); /// Replicated database retries in case of error, it should not write error status. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD bool need_check_status = true; #else bool need_check_status = !is_replicated_database; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index d9d3ba58160..ba6fc0f14a0 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1048,14 +1048,14 @@ static std::tuple executeQueryImpl( /// In case when the client had to retry some mini-INSERTs then they will be properly deduplicated /// by the source tables. This functionality is controlled by a setting `async_insert_deduplicate`. /// But then they will be glued together into a block and pushed through a chain of Materialized Views if any. - /// The process of forming such blocks is not deteministic so each time we retry mini-INSERTs the resulting + /// The process of forming such blocks is not deterministic so each time we retry mini-INSERTs the resulting /// block may be concatenated differently. /// That's why deduplication in dependent Materialized Views doesn't make sense in presence of async INSERTs. if (settings.throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert && settings.deduplicate_blocks_in_dependent_materialized_views) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, - "Deduplication is dependent materialized view cannot work together with async inserts. "\ - "Please disable eiher `deduplicate_blocks_in_dependent_materialized_views` or `async_insert` setting."); + "Deduplication in dependent materialized view cannot work together with async inserts. "\ + "Please disable either `deduplicate_blocks_in_dependent_materialized_views` or `async_insert` setting."); quota = context->getQuota(); if (quota) diff --git a/src/Interpreters/formatWithPossiblyHidingSecrets.h b/src/Interpreters/formatWithPossiblyHidingSecrets.h index 039bcbc2bca..ea8c295b169 100644 --- a/src/Interpreters/formatWithPossiblyHidingSecrets.h +++ b/src/Interpreters/formatWithPossiblyHidingSecrets.h @@ -1,11 +1,14 @@ #pragma once -#include "Access/ContextAccess.h" -#include "Interpreters/Context.h" + +#include +#include + #include namespace DB { + struct SecretHidingFormatSettings { // We can't store const Context& as there's a dangerous usage {.ctx = *getContext()} @@ -24,4 +27,5 @@ inline String format(const SecretHidingFormatSettings & settings) return settings.query.formatWithPossiblyHidingSensitiveData(settings.max_length, settings.one_line, show_secrets); } + } diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index 239cce5b427..62f8aea86d1 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -152,22 +152,20 @@ ASTPtr convertRequiredExpressions(Block & block, const NamesAndTypesList & requi return conversion_expr_list; } -ActionsDAGPtr createExpressions( +std::optional createExpressions( const Block & header, ASTPtr expr_list, bool save_unneeded_columns, ContextPtr context) { if (!expr_list) - return nullptr; + return {}; auto syntax_result = TreeRewriter(context).analyze(expr_list, header.getNamesAndTypesList()); auto expression_analyzer = ExpressionAnalyzer{expr_list, syntax_result, context}; - auto dag = std::make_shared(header.getNamesAndTypesList()); + ActionsDAG dag(header.getNamesAndTypesList()); auto actions = expression_analyzer.getActionsDAG(true, !save_unneeded_columns); - dag = ActionsDAG::merge(std::move(*dag), std::move(*actions)); - - return dag; + return ActionsDAG::merge(std::move(dag), std::move(actions)); } } @@ -180,7 +178,7 @@ void performRequiredConversions(Block & block, const NamesAndTypesList & require if (auto dag = createExpressions(block, conversion_expr_list, true, context)) { - auto expression = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context)); + auto expression = std::make_shared(std::move(*dag), ExpressionActionsSettings::fromContext(context)); expression->execute(block); } } @@ -195,7 +193,7 @@ bool needConvertAnyNullToDefault(const Block & header, const NamesAndTypesList & return false; } -ActionsDAGPtr evaluateMissingDefaults( +std::optional evaluateMissingDefaults( const Block & header, const NamesAndTypesList & required_columns, const ColumnsDescription & columns, @@ -204,7 +202,7 @@ ActionsDAGPtr evaluateMissingDefaults( bool null_as_default) { if (!columns.hasDefaults() && (!null_as_default || !needConvertAnyNullToDefault(header, required_columns, columns))) - return nullptr; + return {}; ASTPtr expr_list = defaultRequiredExpressions(header, required_columns, columns, null_as_default); return createExpressions(header, expr_list, save_unneeded_columns, context); diff --git a/src/Interpreters/inplaceBlockConversions.h b/src/Interpreters/inplaceBlockConversions.h index bea44bf6db9..570eb75dd4a 100644 --- a/src/Interpreters/inplaceBlockConversions.h +++ b/src/Interpreters/inplaceBlockConversions.h @@ -5,9 +5,6 @@ #include #include -#include -#include - namespace DB { @@ -24,12 +21,11 @@ struct StorageInMemoryMetadata; using StorageMetadataPtr = std::shared_ptr; class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; /// Create actions which adds missing defaults to block according to required_columns using columns description /// or substitute NULL into DEFAULT value in case of INSERT SELECT query (null_as_default) if according setting is 1. /// Return nullptr if no actions required. -ActionsDAGPtr evaluateMissingDefaults( +std::optional evaluateMissingDefaults( const Block & header, const NamesAndTypesList & required_columns, const ColumnsDescription & columns, diff --git a/src/Interpreters/tests/gtest_convertFieldToType.cpp b/src/Interpreters/tests/gtest_convertFieldToType.cpp index c8a9d5aa2c0..0cac9a3b59d 100644 --- a/src/Interpreters/tests/gtest_convertFieldToType.cpp +++ b/src/Interpreters/tests/gtest_convertFieldToType.cpp @@ -147,7 +147,7 @@ INSTANTIATE_TEST_SUITE_P( DecimalField(DateTime64(123 * Day * 1'000'000), 6) } }) - ); +); INSTANTIATE_TEST_SUITE_P( DateTimeToDateTime64, @@ -179,3 +179,84 @@ INSTANTIATE_TEST_SUITE_P( }, }) ); + +INSTANTIATE_TEST_SUITE_P( + StringToNumber, + ConvertFieldToTypeTest, + ::testing::ValuesIn(std::initializer_list{ + { + "String", + Field("1"), + "Int8", + Field(1) + }, + { + "String", + Field("256"), + "Int8", + Field() + }, + { + "String", + Field("not a number"), + "Int8", + {} + }, + { + "String", + Field("1.1"), + "Int8", + {} /// we can not convert '1.1' to Int8 + }, + { + "String", + Field("1.1"), + "Float64", + Field(1.1) + }, + }) +); + +INSTANTIATE_TEST_SUITE_P( + NumberToString, + ConvertFieldToTypeTest, + ::testing::ValuesIn(std::initializer_list{ + { + "Int8", + Field(1), + "String", + Field("1") + }, + { + "Int8", + Field(-1), + "String", + Field("-1") + }, + { + "Float64", + Field(1.1), + "String", + Field("1.1") + }, + }) +); + +INSTANTIATE_TEST_SUITE_P( + StringToDate, + ConvertFieldToTypeTest, + ::testing::ValuesIn(std::initializer_list{ + { + "String", + Field("2024-07-12"), + "Date", + Field(static_cast(19916)) + }, + { + "String", + Field("not a date"), + "Date", + {} + }, + }) +); diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp index dc51a13e01f..c1594361b2c 100644 --- a/src/Loggers/OwnSplitChannel.cpp +++ b/src/Loggers/OwnSplitChannel.cpp @@ -18,6 +18,9 @@ namespace DB void OwnSplitChannel::log(const Poco::Message & msg) { + if (!isLoggingEnabled()) + return; + #ifndef WITHOUT_TEXT_LOG auto logs_queue = CurrentThread::getInternalTextLogsQueue(); diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 4a8a3d2967d..c96499095d5 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -1,8 +1,6 @@ #include #include #include -#include -#include namespace DB @@ -15,8 +13,6 @@ ASTPtr ASTColumnDeclaration::clone() const if (type) { - // Type may be an ASTFunction (e.g. `create table t (a Decimal(9,0))`), - // so we have to clone it properly as well. res->type = type->clone(); res->children.push_back(res->type); } diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 6dc009da9a8..f0f782c0a63 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include #include @@ -240,12 +242,12 @@ ASTPtr ASTCreateQuery::clone() const res->set(res->columns_list, columns_list->clone()); if (storage) res->set(res->storage, storage->clone()); - if (inner_storage) - res->set(res->inner_storage, inner_storage->clone()); if (select) res->set(res->select, select->clone()); if (table_overrides) res->set(res->table_overrides, table_overrides->clone()); + if (targets) + res->set(res->targets, targets->clone()); if (dictionary) { @@ -398,20 +400,18 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat refresh_strategy->formatImpl(settings, state, frame); } - if (to_table_id) + if (auto to_table_id = getTargetTableID(ViewTarget::To)) { - assert((is_materialized_view || is_window_view) && to_inner_uuid == UUIDHelpers::Nil); - settings.ostr - << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : "") - << (!to_table_id.database_name.empty() ? backQuoteIfNeed(to_table_id.database_name) + "." : "") - << backQuoteIfNeed(to_table_id.table_name); + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::TO) + << (settings.hilite ? hilite_none : "") << " " + << (!to_table_id.database_name.empty() ? backQuoteIfNeed(to_table_id.database_name) + "." : "") + << backQuoteIfNeed(to_table_id.table_name); } - if (to_inner_uuid != UUIDHelpers::Nil) + if (auto to_inner_uuid = getTargetInnerUUID(ViewTarget::To); to_inner_uuid != UUIDHelpers::Nil) { - assert(is_materialized_view && !to_table_id); - settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO INNER UUID " << (settings.hilite ? hilite_none : "") - << quoteString(toString(to_inner_uuid)); + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::TO_INNER_UUID) + << (settings.hilite ? hilite_none : "") << " " << quoteString(toString(to_inner_uuid)); } bool should_add_empty = is_create_empty; @@ -471,14 +471,17 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat frame.expression_list_always_start_on_new_line = false; - if (inner_storage) + if (storage) + storage->formatImpl(settings, state, frame); + + if (auto inner_storage = getTargetInnerEngine(ViewTarget::Inner)) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " INNER" << (settings.hilite ? hilite_none : ""); + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::INNER) << (settings.hilite ? hilite_none : ""); inner_storage->formatImpl(settings, state, frame); } - if (storage) - storage->formatImpl(settings, state, frame); + if (auto to_storage = getTargetInnerEngine(ViewTarget::To)) + to_storage->formatImpl(settings, state, frame); if (dictionary) dictionary->formatImpl(settings, state, frame); @@ -538,48 +541,57 @@ bool ASTCreateQuery::isParameterizedView() const } -ASTCreateQuery::UUIDs::UUIDs(const ASTCreateQuery & query) - : uuid(query.uuid) - , to_inner_uuid(query.to_inner_uuid) +void ASTCreateQuery::generateRandomUUIDs() { + CreateQueryUUIDs{*this, /* generate_random= */ true}.copyToQuery(*this); } -String ASTCreateQuery::UUIDs::toString() const +void ASTCreateQuery::resetUUIDs() { - WriteBufferFromOwnString out; - out << "{" << uuid << "," << to_inner_uuid << "}"; - return out.str(); + CreateQueryUUIDs{}.copyToQuery(*this); } -ASTCreateQuery::UUIDs ASTCreateQuery::UUIDs::fromString(const String & str) + +StorageID ASTCreateQuery::getTargetTableID(ViewTarget::Kind target_kind) const { - ReadBufferFromString in{str}; - ASTCreateQuery::UUIDs res; - in >> "{" >> res.uuid >> "," >> res.to_inner_uuid >> "}"; - return res; + if (targets) + return targets->getTableID(target_kind); + return StorageID::createEmpty(); } -ASTCreateQuery::UUIDs ASTCreateQuery::generateRandomUUID(bool always_generate_new_uuid) +bool ASTCreateQuery::hasTargetTableID(ViewTarget::Kind target_kind) const { - if (always_generate_new_uuid) - setUUID({}); - - if (uuid == UUIDHelpers::Nil) - uuid = UUIDHelpers::generateV4(); - - /// If destination table (to_table_id) is not specified for materialized view, - /// then MV will create inner table. We should generate UUID of inner table here. - bool need_uuid_for_inner_table = !attach && is_materialized_view && !to_table_id; - if (need_uuid_for_inner_table && (to_inner_uuid == UUIDHelpers::Nil)) - to_inner_uuid = UUIDHelpers::generateV4(); - - return UUIDs{*this}; + if (targets) + return targets->hasTableID(target_kind); + return false; } -void ASTCreateQuery::setUUID(const UUIDs & uuids) +UUID ASTCreateQuery::getTargetInnerUUID(ViewTarget::Kind target_kind) const { - uuid = uuids.uuid; - to_inner_uuid = uuids.to_inner_uuid; + if (targets) + return targets->getInnerUUID(target_kind); + return UUIDHelpers::Nil; +} + +bool ASTCreateQuery::hasInnerUUIDs() const +{ + if (targets) + return targets->hasInnerUUIDs(); + return false; +} + +std::shared_ptr ASTCreateQuery::getTargetInnerEngine(ViewTarget::Kind target_kind) const +{ + if (targets) + return targets->getInnerEngine(target_kind); + return nullptr; +} + +void ASTCreateQuery::setTargetInnerEngine(ViewTarget::Kind target_kind, ASTPtr storage_def) +{ + if (!targets) + set(targets, std::make_shared()); + targets->setInnerEngine(target_kind, storage_def); } } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 9e4364b1f25..a95010aea31 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ namespace DB class ASTFunction; class ASTSetQuery; class ASTSelectWithUnionQuery; +struct CreateQueryUUIDs; class ASTStorage : public IAST @@ -101,17 +103,15 @@ public: bool has_uuid{false}; // CREATE TABLE x UUID '...' ASTColumns * columns_list = nullptr; - - StorageID to_table_id = StorageID::createEmpty(); /// For CREATE MATERIALIZED VIEW mv TO table. - UUID to_inner_uuid = UUIDHelpers::Nil; /// For materialized view with inner table - ASTStorage * inner_storage = nullptr; /// For window view with inner table ASTStorage * storage = nullptr; + ASTPtr watermark_function; ASTPtr lateness_function; String as_database; String as_table; IAST * as_table_function = nullptr; ASTSelectWithUnionQuery * select = nullptr; + ASTViewTargets * targets = nullptr; IAST * comment = nullptr; ASTPtr sql_security = nullptr; @@ -153,17 +153,26 @@ public: QueryKind getQueryKind() const override { return QueryKind::Create; } - struct UUIDs - { - UUID uuid = UUIDHelpers::Nil; - UUID to_inner_uuid = UUIDHelpers::Nil; - UUIDs() = default; - explicit UUIDs(const ASTCreateQuery & query); - String toString() const; - static UUIDs fromString(const String & str); - }; - UUIDs generateRandomUUID(bool always_generate_new_uuid = false); - void setUUID(const UUIDs & uuids); + /// Generates a random UUID for this create query if it's not specified already. + /// The function also generates random UUIDs for inner target tables if this create query implies that + /// (for example, if it's a `CREATE MATERIALIZED VIEW` query with an inner storage). + void generateRandomUUIDs(); + + /// Removes UUID from this create query. + /// The function also removes UUIDs for inner target tables from this create query (see also generateRandomUUID()). + void resetUUIDs(); + + /// Returns information about a target table. + /// If that information isn't specified in this create query (or even not allowed) then the function returns an empty value. + StorageID getTargetTableID(ViewTarget::Kind target_kind) const; + bool hasTargetTableID(ViewTarget::Kind target_kind) const; + UUID getTargetInnerUUID(ViewTarget::Kind target_kind) const; + bool hasInnerUUIDs() const; + std::shared_ptr getTargetInnerEngine(ViewTarget::Kind target_kind) const; + void setTargetInnerEngine(ViewTarget::Kind target_kind, ASTPtr storage_def); + + bool is_materialized_view_with_external_target() const { return is_materialized_view && hasTargetTableID(ViewTarget::To); } + bool is_materialized_view_with_inner_table() const { return is_materialized_view && !hasTargetTableID(ViewTarget::To); } protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; @@ -171,8 +180,8 @@ protected: void forEachPointerToChild(std::function f) override { f(reinterpret_cast(&columns_list)); - f(reinterpret_cast(&inner_storage)); f(reinterpret_cast(&storage)); + f(reinterpret_cast(&targets)); f(reinterpret_cast(&as_table_function)); f(reinterpret_cast(&select)); f(reinterpret_cast(&comment)); diff --git a/src/Parsers/ASTDataType.cpp b/src/Parsers/ASTDataType.cpp new file mode 100644 index 00000000000..3c17ae8c380 --- /dev/null +++ b/src/Parsers/ASTDataType.cpp @@ -0,0 +1,57 @@ +#include +#include +#include + + +namespace DB +{ + +String ASTDataType::getID(char delim) const +{ + return "DataType" + (delim + name); +} + +ASTPtr ASTDataType::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + + if (arguments) + { + res->arguments = arguments->clone(); + res->children.push_back(res->arguments); + } + + return res; +} + +void ASTDataType::updateTreeHashImpl(SipHash & hash_state, bool) const +{ + hash_state.update(name.size()); + hash_state.update(name); + /// Children are hashed automatically. +} + +void ASTDataType::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + settings.ostr << (settings.hilite ? hilite_function : "") << name; + + if (arguments && !arguments->children.empty()) + { + settings.ostr << '(' << (settings.hilite ? hilite_none : ""); + + for (size_t i = 0, size = arguments->children.size(); i < size; ++i) + { + if (i != 0) + settings.ostr << ", "; + + arguments->children[i]->formatImpl(settings, state, frame); + } + + settings.ostr << (settings.hilite ? hilite_function : "") << ')'; + } + + settings.ostr << (settings.hilite ? hilite_none : ""); +} + +} diff --git a/src/Parsers/ASTDataType.h b/src/Parsers/ASTDataType.h new file mode 100644 index 00000000000..71d3aeaa4eb --- /dev/null +++ b/src/Parsers/ASTDataType.h @@ -0,0 +1,38 @@ +#pragma once + +#include + + +namespace DB +{ + +/// AST for data types, e.g. UInt8 or Tuple(x UInt8, y Enum(a = 1)) +class ASTDataType : public IAST +{ +public: + String name; + ASTPtr arguments; + + String getID(char delim) const override; + ASTPtr clone() const override; + void updateTreeHashImpl(SipHash & hash_state, bool ignore_aliases) const override; + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +}; + +template +std::shared_ptr makeASTDataType(const String & name, Args &&... args) +{ + auto data_type = std::make_shared(); + data_type->name = name; + + if constexpr (sizeof...(args)) + { + data_type->arguments = std::make_shared(); + data_type->children.push_back(data_type->arguments); + data_type->arguments->children = { std::forward(args)... }; + } + + return data_type; +} + +} diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index b04ec1c22b2..230d4c778e8 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -329,19 +329,23 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format const auto * literal = arguments->children[0]->as(); const auto * function = arguments->children[0]->as(); + const auto * subquery = arguments->children[0]->as(); bool is_tuple = literal && literal->value.getType() == Field::Types::Tuple; - // do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))` + /// Do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))` bool literal_need_parens = literal && !is_tuple; - // negate always requires parentheses, otherwise -(-1) will be printed as --1 - bool inside_parens = name == "negate" && (literal_need_parens || (function && function->name == "negate")); + /// Negate always requires parentheses, otherwise -(-1) will be printed as --1 + /// Also extra parentheses are needed for subqueries, because NOT can be parsed as a function: + /// not(SELECT 1) cannot be parsed, while not((SELECT 1)) can. + bool inside_parens = (name == "negate" && (literal_need_parens || (function && function->name == "negate"))) + || (subquery && name == "not"); /// We DO need parentheses around a single literal /// For example, SELECT (NOT 0) + (NOT 0) cannot be transformed into SELECT NOT 0 + NOT 0, since /// this is equal to SELECT NOT (0 + NOT 0) bool outside_parens = frame.need_parens && !inside_parens; - // do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1))) + /// Do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1))) if (inside_parens) nested_need_parens.need_parens = false; diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index 3a94691f25d..be2b6beae54 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -46,7 +46,7 @@ public: NullsAction nulls_action = NullsAction::EMPTY; - /// do not print empty parentheses if there are no args - compatibility with new AST for data types and engine names. + /// do not print empty parentheses if there are no args - compatibility with engine names. bool no_empty_args = false; /// Specifies where this function-like expression is used. diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index d22a4eca0fc..b6d42513aa7 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -235,7 +235,13 @@ void ASTTableJoin::formatImplAfterTable(const FormatSettings & settings, FormatS else if (on_expression) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " ON " << (settings.hilite ? hilite_none : ""); + /// If there is an alias for the whole expression parens should be added, otherwise it will be invalid syntax + bool on_has_alias = !on_expression->tryGetAlias().empty(); + if (on_has_alias) + settings.ostr << "("; on_expression->formatImpl(settings, state, frame); + if (on_has_alias) + settings.ostr << ")"; } } diff --git a/src/Parsers/ASTViewTargets.cpp b/src/Parsers/ASTViewTargets.cpp new file mode 100644 index 00000000000..8ee98e704df --- /dev/null +++ b/src/Parsers/ASTViewTargets.cpp @@ -0,0 +1,300 @@ +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + + +std::string_view toString(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return "to"; + case ViewTarget::Inner: return "inner"; + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "{} doesn't support kind {}", __FUNCTION__, kind); +} + +void parseFromString(ViewTarget::Kind & out, std::string_view str) +{ + for (auto kind : magic_enum::enum_values()) + { + if (toString(kind) == str) + { + out = kind; + return; + } + } + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: Unexpected string {}", __FUNCTION__, str); +} + + +std::vector ASTViewTargets::getKinds() const +{ + std::vector kinds; + kinds.reserve(targets.size()); + for (const auto & target : targets) + kinds.push_back(target.kind); + return kinds; +} + + +void ASTViewTargets::setTableID(ViewTarget::Kind kind, const StorageID & table_id_) +{ + for (auto & target : targets) + { + if (target.kind == kind) + { + target.table_id = table_id_; + return; + } + } + if (table_id_) + targets.emplace_back(kind).table_id = table_id_; +} + +StorageID ASTViewTargets::getTableID(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return target->table_id; + return StorageID::createEmpty(); +} + +bool ASTViewTargets::hasTableID(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return !target->table_id.empty(); + return false; +} + +void ASTViewTargets::setCurrentDatabase(const String & current_database) +{ + for (auto & target : targets) + { + auto & table_id = target.table_id; + if (!table_id.table_name.empty() && table_id.database_name.empty()) + table_id.database_name = current_database; + } +} + +void ASTViewTargets::setInnerUUID(ViewTarget::Kind kind, const UUID & inner_uuid_) +{ + for (auto & target : targets) + { + if (target.kind == kind) + { + target.inner_uuid = inner_uuid_; + return; + } + } + if (inner_uuid_ != UUIDHelpers::Nil) + targets.emplace_back(kind).inner_uuid = inner_uuid_; +} + +UUID ASTViewTargets::getInnerUUID(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return target->inner_uuid; + return UUIDHelpers::Nil; +} + +bool ASTViewTargets::hasInnerUUID(ViewTarget::Kind kind) const +{ + return getInnerUUID(kind) != UUIDHelpers::Nil; +} + +void ASTViewTargets::resetInnerUUIDs() +{ + for (auto & target : targets) + target.inner_uuid = UUIDHelpers::Nil; +} + +bool ASTViewTargets::hasInnerUUIDs() const +{ + for (const auto & target : targets) + { + if (target.inner_uuid != UUIDHelpers::Nil) + return true; + } + return false; +} + +void ASTViewTargets::setInnerEngine(ViewTarget::Kind kind, ASTPtr storage_def) +{ + auto new_inner_engine = typeid_cast>(storage_def); + if (!new_inner_engine && storage_def) + throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Bad cast from type {} to ASTStorage", storage_def->getID()); + + for (auto & target : targets) + { + if (target.kind == kind) + { + if (target.inner_engine == new_inner_engine) + return; + if (new_inner_engine) + children.push_back(new_inner_engine); + if (target.inner_engine) + std::erase(children, target.inner_engine); + target.inner_engine = new_inner_engine; + return; + } + } + + if (new_inner_engine) + { + targets.emplace_back(kind).inner_engine = new_inner_engine; + children.push_back(new_inner_engine); + } +} + +std::shared_ptr ASTViewTargets::getInnerEngine(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return target->inner_engine; + return nullptr; +} + +std::vector> ASTViewTargets::getInnerEngines() const +{ + std::vector> res; + res.reserve(targets.size()); + for (const auto & target : targets) + { + if (target.inner_engine) + res.push_back(target.inner_engine); + } + return res; +} + +const ViewTarget * ASTViewTargets::tryGetTarget(ViewTarget::Kind kind) const +{ + for (const auto & target : targets) + { + if (target.kind == kind) + return ⌖ + } + return nullptr; +} + +ASTPtr ASTViewTargets::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + for (auto & target : res->targets) + { + if (target.inner_engine) + { + target.inner_engine = typeid_cast>(target.inner_engine->clone()); + res->children.push_back(target.inner_engine); + } + } + return res; +} + +void ASTViewTargets::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const +{ + for (const auto & target : targets) + formatTarget(target, s, state, frame); +} + +void ASTViewTargets::formatTarget(ViewTarget::Kind kind, const FormatSettings & s, FormatState & state, FormatStateStacked frame) const +{ + for (const auto & target : targets) + { + if (target.kind == kind) + formatTarget(target, s, state, frame); + } +} + +void ASTViewTargets::formatTarget(const ViewTarget & target, const FormatSettings & s, FormatState & state, FormatStateStacked frame) +{ + if (target.table_id) + { + auto keyword = getKeywordForTableID(target.kind); + if (!keyword) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No keyword for table name of kind {}", toString(target.kind)); + s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) + << (s.hilite ? hilite_none : "") << " " + << (!target.table_id.database_name.empty() ? backQuoteIfNeed(target.table_id.database_name) + "." : "") + << backQuoteIfNeed(target.table_id.table_name); + } + + if (target.inner_uuid != UUIDHelpers::Nil) + { + auto keyword = getKeywordForInnerUUID(target.kind); + if (!keyword) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No prefix keyword for inner UUID of kind {}", toString(target.kind)); + s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) + << (s.hilite ? hilite_none : "") << " " << quoteString(toString(target.inner_uuid)); + } + + if (target.inner_engine) + { + auto keyword = getKeywordForInnerStorage(target.kind); + if (!keyword) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No prefix keyword for table engine of kind {}", toString(target.kind)); + s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) << (s.hilite ? hilite_none : ""); + target.inner_engine->formatImpl(s, state, frame); + } +} + +std::optional ASTViewTargets::getKeywordForTableID(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return Keyword::TO; /// TO mydb.mydata + case ViewTarget::Inner: return std::nullopt; + } + UNREACHABLE(); +} + +std::optional ASTViewTargets::getKeywordForInnerStorage(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return std::nullopt; /// ENGINE = MergeTree() + case ViewTarget::Inner: return Keyword::INNER; /// INNER ENGINE = MergeTree() + } + UNREACHABLE(); +} + +std::optional ASTViewTargets::getKeywordForInnerUUID(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return Keyword::TO_INNER_UUID; /// TO INNER UUID 'XXX' + case ViewTarget::Inner: return std::nullopt; + } + UNREACHABLE(); +} + +void ASTViewTargets::forEachPointerToChild(std::function f) +{ + for (auto & target : targets) + { + if (target.inner_engine) + { + ASTStorage * new_inner_engine = target.inner_engine.get(); + f(reinterpret_cast(&new_inner_engine)); + if (new_inner_engine != target.inner_engine.get()) + { + if (new_inner_engine) + target.inner_engine = typeid_cast>(new_inner_engine->ptr()); + else + target.inner_engine.reset(); + } + } + } +} + +} diff --git a/src/Parsers/ASTViewTargets.h b/src/Parsers/ASTViewTargets.h new file mode 100644 index 00000000000..12182919f0e --- /dev/null +++ b/src/Parsers/ASTViewTargets.h @@ -0,0 +1,115 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ASTStorage; +enum class Keyword : size_t; + +/// Information about target tables (external or inner) of a materialized view or a window view. +/// See ASTViewTargets for more details. +struct ViewTarget +{ + enum Kind + { + /// If `kind == ViewTarget::To` then `ViewTarget` contains information about the "TO" table of a materialized view or a window view: + /// CREATE MATERIALIZED VIEW db.mv_name {TO [db.]to_target | ENGINE to_engine} AS SELECT ... + /// or + /// CREATE WINDOW VIEW db.wv_name {TO [db.]to_target | ENGINE to_engine} AS SELECT ... + To, + + /// If `kind == ViewTarget::Inner` then `ViewTarget` contains information about the "INNER" table of a window view: + /// CREATE WINDOW VIEW db.wv_name {INNER ENGINE inner_engine} AS SELECT ... + Inner, + }; + + Kind kind = To; + + /// StorageID of the target table, if it's not inner. + /// That storage ID can be seen for example after "TO" in a statement like CREATE MATERIALIZED VIEW ... TO ... + StorageID table_id = StorageID::createEmpty(); + + /// UUID of the target table, if it's inner. + /// The UUID is calculated automatically and can be seen for example after "TO INNER UUID" in a statement like + /// CREATE MATERIALIZED VIEW ... TO INNER UUID ... + UUID inner_uuid = UUIDHelpers::Nil; + + /// Table engine of the target table, if it's inner. + /// That engine can be seen for example after "ENGINE" in a statement like CREATE MATERIALIZED VIEW ... ENGINE ... + std::shared_ptr inner_engine; +}; + +/// Converts ViewTarget::Kind to a string. +std::string_view toString(ViewTarget::Kind kind); +void parseFromString(ViewTarget::Kind & out, std::string_view str); + + +/// Information about all target tables (external or inner) of a view. +/// +/// For example, for a materialized view: +/// CREATE MATERIALIZED VIEW db.mv_name [TO [db.]to_target | ENGINE to_engine] AS SELECT ... +/// this class contains information about the "TO" table: its name and database (if it's external), its UUID and engine (if it's inner). +/// +/// For a window view: +/// CREATE WINDOW VIEW db.wv_name [TO [db.]to_target | ENGINE to_engine] [INNER ENGINE inner_engine] AS SELECT ... +/// this class contains information about both the "TO" table and the "INNER" table. +class ASTViewTargets : public IAST +{ +public: + std::vector targets; + + /// Sets the StorageID of the target table, if it's not inner. + /// That storage ID can be seen for example after "TO" in a statement like CREATE MATERIALIZED VIEW ... TO ... + void setTableID(ViewTarget::Kind kind, const StorageID & table_id_); + StorageID getTableID(ViewTarget::Kind kind) const; + bool hasTableID(ViewTarget::Kind kind) const; + + /// Replaces an empty database in the StorageID of the target table with a specified database. + void setCurrentDatabase(const String & current_database); + + /// Sets the UUID of the target table, if it's inner. + /// The UUID is calculated automatically and can be seen for example after "TO INNER UUID" in a statement like + /// CREATE MATERIALIZED VIEW ... TO INNER UUID ... + void setInnerUUID(ViewTarget::Kind kind, const UUID & inner_uuid_); + UUID getInnerUUID(ViewTarget::Kind kind) const; + bool hasInnerUUID(ViewTarget::Kind kind) const; + + void resetInnerUUIDs(); + bool hasInnerUUIDs() const; + + /// Sets the table engine of the target table, if it's inner. + /// That engine can be seen for example after "ENGINE" in a statement like CREATE MATERIALIZED VIEW ... ENGINE ... + void setInnerEngine(ViewTarget::Kind kind, ASTPtr storage_def); + std::shared_ptr getInnerEngine(ViewTarget::Kind kind) const; + std::vector> getInnerEngines() const; + + /// Returns a list of all kinds of views in this ASTViewTargets. + std::vector getKinds() const; + + /// Returns information about a target table. + /// The function returns null if such target doesn't exist. + const ViewTarget * tryGetTarget(ViewTarget::Kind kind) const; + + String getID(char) const override { return "ViewTargets"; } + + ASTPtr clone() const override; + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; + + /// Formats information only about a specific target table. + void formatTarget(ViewTarget::Kind kind, const FormatSettings & s, FormatState & state, FormatStateStacked frame) const; + static void formatTarget(const ViewTarget & target, const FormatSettings & s, FormatState & state, FormatStateStacked frame); + + /// Helper functions for class ParserViewTargets. Returns a prefix keyword matching a specified target kind. + static std::optional getKeywordForTableID(ViewTarget::Kind kind); + static std::optional getKeywordForInnerUUID(ViewTarget::Kind kind); + static std::optional getKeywordForInnerStorage(ViewTarget::Kind kind); + +protected: + void forEachPointerToChild(std::function f) override; +}; + +} diff --git a/src/Parsers/CreateQueryUUIDs.cpp b/src/Parsers/CreateQueryUUIDs.cpp new file mode 100644 index 00000000000..4dfee67b537 --- /dev/null +++ b/src/Parsers/CreateQueryUUIDs.cpp @@ -0,0 +1,168 @@ +#include + +#include +#include +#include +#include + + +namespace DB +{ + +CreateQueryUUIDs::CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_random, bool force_random) +{ + if (!generate_random || !force_random) + { + uuid = query.uuid; + if (query.targets) + { + for (const auto & target : query.targets->targets) + setTargetInnerUUID(target.kind, target.inner_uuid); + } + } + + if (generate_random) + { + if (uuid == UUIDHelpers::Nil) + uuid = UUIDHelpers::generateV4(); + + /// For an ATTACH query we should never generate UUIDs for its inner target tables + /// because for an ATTACH query those inner target tables probably already exist and can be accessible by names. + /// If we generate random UUIDs for already existing tables then those UUIDs will not be correct making those inner target table inaccessible. + /// Thus it's not safe for example to replace + /// "ATTACH MATERIALIZED VIEW mv AS SELECT a FROM b" with + /// "ATTACH MATERIALIZED VIEW mv TO INNER UUID "XXXX" AS SELECT a FROM b" + /// This replacement is safe only for CREATE queries when inner target tables don't exist yet. + if (!query.attach) + { + auto generate_target_uuid = [&](ViewTarget::Kind target_kind) + { + if ((query.getTargetInnerUUID(target_kind) == UUIDHelpers::Nil) && query.getTargetTableID(target_kind).empty()) + setTargetInnerUUID(target_kind, UUIDHelpers::generateV4()); + }; + + /// If destination table (to_table_id) is not specified for materialized view, + /// then MV will create inner table. We should generate UUID of inner table here. + if (query.is_materialized_view) + generate_target_uuid(ViewTarget::To); + } + } +} + +bool CreateQueryUUIDs::empty() const +{ + if (uuid != UUIDHelpers::Nil) + return false; + for (const auto & [_, inner_uuid] : targets_inner_uuids) + { + if (inner_uuid != UUIDHelpers::Nil) + return false; + } + return true; +} + +String CreateQueryUUIDs::toString() const +{ + WriteBufferFromOwnString out; + out << "{"; + bool need_comma = false; + auto add_name_and_uuid_to_string = [&](std::string_view name_, const UUID & uuid_) + { + if (std::exchange(need_comma, true)) + out << ", "; + out << "\"" << name_ << "\": \"" << uuid_ << "\""; + }; + if (uuid != UUIDHelpers::Nil) + add_name_and_uuid_to_string("uuid", uuid); + for (const auto & [kind, inner_uuid] : targets_inner_uuids) + { + if (inner_uuid != UUIDHelpers::Nil) + add_name_and_uuid_to_string(::DB::toString(kind), inner_uuid); + } + out << "}"; + return out.str(); +} + +CreateQueryUUIDs CreateQueryUUIDs::fromString(const String & str) +{ + ReadBufferFromString in{str}; + CreateQueryUUIDs res; + skipWhitespaceIfAny(in); + in >> "{"; + skipWhitespaceIfAny(in); + char c; + while (in.peek(c) && c != '}') + { + String name; + String value; + readDoubleQuotedString(name, in); + skipWhitespaceIfAny(in); + in >> ":"; + skipWhitespaceIfAny(in); + readDoubleQuotedString(value, in); + skipWhitespaceIfAny(in); + if (name == "uuid") + { + res.uuid = parse(value); + } + else + { + ViewTarget::Kind kind; + parseFromString(kind, name); + res.setTargetInnerUUID(kind, parse(value)); + } + if (in.peek(c) && c == ',') + { + in.ignore(1); + skipWhitespaceIfAny(in); + } + } + in >> "}"; + return res; +} + +void CreateQueryUUIDs::setTargetInnerUUID(ViewTarget::Kind kind, const UUID & new_inner_uuid) +{ + for (auto & pair : targets_inner_uuids) + { + if (pair.first == kind) + { + pair.second = new_inner_uuid; + return; + } + } + if (new_inner_uuid != UUIDHelpers::Nil) + targets_inner_uuids.emplace_back(kind, new_inner_uuid); +} + +UUID CreateQueryUUIDs::getTargetInnerUUID(ViewTarget::Kind kind) const +{ + for (const auto & pair : targets_inner_uuids) + { + if (pair.first == kind) + return pair.second; + } + return UUIDHelpers::Nil; +} + +void CreateQueryUUIDs::copyToQuery(ASTCreateQuery & query) const +{ + query.uuid = uuid; + + if (query.targets) + query.targets->resetInnerUUIDs(); + + if (!targets_inner_uuids.empty()) + { + if (!query.targets) + query.set(query.targets, std::make_shared()); + + for (const auto & [kind, inner_uuid] : targets_inner_uuids) + { + if (inner_uuid != UUIDHelpers::Nil) + query.targets->setInnerUUID(kind, inner_uuid); + } + } +} + +} diff --git a/src/Parsers/CreateQueryUUIDs.h b/src/Parsers/CreateQueryUUIDs.h new file mode 100644 index 00000000000..419dad24b35 --- /dev/null +++ b/src/Parsers/CreateQueryUUIDs.h @@ -0,0 +1,40 @@ +#pragma once + +#include + + +namespace DB +{ +class ASTCreateQuery; + +/// The UUID of a table or a database defined with a CREATE QUERY along with the UUIDs of its inner targets. +struct CreateQueryUUIDs +{ + CreateQueryUUIDs() = default; + + /// Collect UUIDs from ASTCreateQuery. + /// Parameters: + /// `generate_random` - if it's true then unspecified in the query UUIDs will be generated randomly; + /// `force_random` - if it's true then all UUIDs (even specified in the query) will be (re)generated randomly. + explicit CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_random = false, bool force_random = false); + + bool empty() const; + explicit operator bool() const { return !empty(); } + + String toString() const; + static CreateQueryUUIDs fromString(const String & str); + + void setTargetInnerUUID(ViewTarget::Kind kind, const UUID & new_inner_uuid); + UUID getTargetInnerUUID(ViewTarget::Kind kind) const; + + /// Copies UUIDs to ASTCreateQuery. + void copyToQuery(ASTCreateQuery & query) const; + + /// UUID of the table. + UUID uuid = UUIDHelpers::Nil; + + /// UUIDs of its target table (or tables). + std::vector> targets_inner_uuids; +}; + +} diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index d4fc9a4bc4d..865d07faaa7 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -9,8 +9,8 @@ #include #include #include -#include "Parsers/CommonParsers.h" +#include #include #include #include @@ -725,7 +725,6 @@ bool ParserStatisticsType::parseImpl(Pos & pos, ASTPtr & node, Expected & expect function_node->name = "STATISTICS"; function_node->arguments = stat_type; function_node->children.push_back(function_node->arguments); - node = function_node; return true; } diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 4e3f29bfe0c..0209e785bff 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -9,7 +9,7 @@ namespace DB { -/** The SELECT subquery is in parenthesis. +/** The SELECT subquery, in parentheses. */ class ParserSubquery : public IParserBase { diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index fff8383e7b3..d38dc6d5f37 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2388,6 +2388,24 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } +bool ParserExpressionWithOptionalArguments::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserIdentifier id_p; + ParserFunction func_p; + + if (ParserFunction(false, false).parse(pos, node, expected)) + return true; + + if (ParserIdentifier().parse(pos, node, expected)) + { + node = makeASTFunction(node->as()->name()); + node->as().no_empty_args = true; + return true; + } + + return false; +} + const std::vector> ParserExpressionImpl::operators_table { {"->", Operator("lambda", 1, 2, OperatorType::Lambda)}, @@ -2743,7 +2761,7 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po /// 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator if (op.function_name == "and" && layers.back()->between_counter) { - layers.back()->between_counter--; + --layers.back()->between_counter; op = finish_between_operator; } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 235d5782630..6ab38416f32 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -144,6 +144,16 @@ protected: }; +/** Similar to ParserFunction (and yields ASTFunction), but can also parse identifiers without braces. + */ +class ParserExpressionWithOptionalArguments : public IParserBase +{ +protected: + const char * getName() const override { return "expression with optional parameters"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + /** An expression with an infix binary left-associative operator. * For example, a + b - c + d. */ diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index ee70fed0f07..d70c1cd0b6c 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -66,7 +66,7 @@ public: /** Set the alias. */ virtual void setAlias(const String & /*to*/) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't set alias of {}", getColumnName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't set alias of {} of {}", getColumnName(), getID()); } /** Get the text that identifies this element. */ @@ -271,16 +271,15 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown element in AST: {}", getID()); } - // Secrets are displayed regarding show_secrets, then SensitiveDataMasker is applied. - // You can use Interpreters/formatWithPossiblyHidingSecrets.h for convenience. + /// Secrets are displayed regarding show_secrets, then SensitiveDataMasker is applied. + /// You can use Interpreters/formatWithPossiblyHidingSecrets.h for convenience. String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const; - /* - * formatForLogging and formatForErrorMessage always hide secrets. This inconsistent - * behaviour is due to the fact such functions are called from Client which knows nothing about - * access rights and settings. Moreover, the only use case for displaying secrets are backups, - * and backup tools use only direct input and ignore logs and error messages. - */ + /** formatForLogging and formatForErrorMessage always hide secrets. This inconsistent + * behaviour is due to the fact such functions are called from Client which knows nothing about + * access rights and settings. Moreover, the only use case for displaying secrets are backups, + * and backup tools use only direct input and ignore logs and error messages. + */ String formatForLogging(size_t max_length = 0) const { return formatWithPossiblyHidingSensitiveData(max_length, true, false); diff --git a/src/Parsers/MySQL/tests/gtest_column_parser.cpp b/src/Parsers/MySQL/tests/gtest_column_parser.cpp index 21c37e4ee2e..3a9a0690f06 100644 --- a/src/Parsers/MySQL/tests/gtest_column_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_column_parser.cpp @@ -1,13 +1,14 @@ #include #include #include -#include +#include #include #include #include #include #include + using namespace DB; using namespace DB::MySQLParser; @@ -19,8 +20,8 @@ TEST(ParserColumn, AllNonGeneratedColumnOption) "COLUMN_FORMAT FIXED STORAGE MEMORY REFERENCES tbl_name (col_01) CHECK 1"; ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); - EXPECT_EQ(ast->as()->data_type->as()->name, "VARCHAR"); - EXPECT_EQ(ast->as()->data_type->as()->arguments->children[0]->as()->value.safeGet(), 100); + EXPECT_EQ(ast->as()->data_type->as()->name, "VARCHAR"); + EXPECT_EQ(ast->as()->data_type->as()->arguments->children[0]->as()->value.safeGet(), 100); ASTDeclareOptions * declare_options = ast->as()->column_options->as(); EXPECT_EQ(declare_options->changes["is_null"]->as()->value.safeGet(), 0); @@ -44,8 +45,8 @@ TEST(ParserColumn, AllGeneratedColumnOption) "REFERENCES tbl_name (col_01) CHECK 1 GENERATED ALWAYS AS (1) STORED"; ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); - EXPECT_EQ(ast->as()->data_type->as()->name, "VARCHAR"); - EXPECT_EQ(ast->as()->data_type->as()->arguments->children[0]->as()->value.safeGet(), 100); + EXPECT_EQ(ast->as()->data_type->as()->name, "VARCHAR"); + EXPECT_EQ(ast->as()->data_type->as()->arguments->children[0]->as()->value.safeGet(), 100); ASTDeclareOptions * declare_options = ast->as()->column_options->as(); EXPECT_EQ(declare_options->changes["is_null"]->as()->value.safeGet(), 1); diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index 28dbf781011..dbefb0cb966 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -9,8 +9,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index 2fa34696c58..9ebee4cc852 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -7,9 +7,9 @@ #include #include #include -#include #include + namespace DB { @@ -21,7 +21,7 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected ParserToken close_p(TokenType::ClosingRoundBracket); ParserOrderByExpressionList order_list_p; - ParserDataType data_type_p; + ParserExpressionWithOptionalArguments type_p; ParserExpression expression_p; ParserUnsignedInteger granularity_p; @@ -68,7 +68,7 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected if (s_type.ignore(pos, expected)) { - if (!data_type_p.parse(pos, type, expected)) + if (!type_p.parse(pos, type, expected)) return false; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 014dc7bd3bf..a592975613b 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include #include #include @@ -75,9 +77,9 @@ bool ParserNestedTable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!close.ignore(pos, expected)) return false; - auto func = std::make_shared(); + auto func = std::make_shared(); tryGetIdentifierNameInto(name, func->name); - // FIXME(ilezhankin): func->no_empty_args = true; ? + func->arguments = columns; func->children.push_back(columns); node = func; @@ -178,7 +180,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ParserKeyword s_granularity(Keyword::GRANULARITY); ParserIdentifier name_p; - ParserDataType data_type_p; + ParserExpressionWithOptionalArguments type_p; ParserExpression expression_p; ParserUnsignedInteger granularity_p; @@ -196,7 +198,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!s_type.ignore(pos, expected)) return false; - if (!data_type_p.parse(pos, type, expected)) + if (!type_p.parse(pos, type, expected)) return false; if (s_granularity.ignore(pos, expected)) @@ -230,7 +232,7 @@ bool ParserStatisticsDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserKeyword s_type(Keyword::TYPE); ParserList columns_p(std::make_unique(), std::make_unique(TokenType::Comma), false); - ParserList types_p(std::make_unique(), std::make_unique(TokenType::Comma), false); + ParserList types_p(std::make_unique(), std::make_unique(TokenType::Comma), false); ASTPtr columns; ASTPtr types; @@ -693,7 +695,8 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ASTPtr table; ASTPtr columns_list; - ASTPtr storage; + std::shared_ptr storage; + ASTPtr targets; ASTPtr as_database; ASTPtr as_table; ASTPtr as_table_function; @@ -749,7 +752,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe auto * table_id = table->as(); - // Shortcut for ATTACH a previously detached table + /// A shortcut for ATTACH a previously detached table. bool short_attach = attach && !from_path; if (short_attach && (!pos.isValid() || pos.get().type == TokenType::Semicolon)) { @@ -773,6 +776,17 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return true; } + auto parse_storage = [&] + { + chassert(!storage); + ASTPtr ast; + if (!storage_p.parse(pos, ast, expected)) + return false; + + storage = typeid_cast>(ast); + return true; + }; + auto need_parse_as_select = [&is_create_empty, &pos, &expected]() { if (ParserKeyword{Keyword::EMPTY_AS}.ignore(pos, expected)) @@ -798,7 +812,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!s_rparen.ignore(pos, expected)) return false; - auto storage_parse_result = storage_p.parse(pos, storage, expected); + auto storage_parse_result = parse_storage(); if ((storage_parse_result || is_temporary) && need_parse_as_select()) { @@ -820,7 +834,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe */ else { - storage_p.parse(pos, storage, expected); + parse_storage(); /// CREATE|ATTACH TABLE ... AS ... if (need_parse_as_select()) @@ -843,7 +857,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe /// Optional - ENGINE can be specified. if (!storage) - storage_p.parse(pos, storage, expected); + parse_storage(); } } } @@ -904,6 +918,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); query->is_create_empty = is_create_empty; if (from_path) @@ -977,6 +992,13 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e return false; } + std::shared_ptr targets; + if (to_table) + { + targets = std::make_shared(); + targets->setTableID(ViewTarget::To, to_table->as()->getTableId()); + } + /// Optional - a list of columns can be specified. It must fully comply with SELECT. if (s_lparen.ignore(pos, expected)) { @@ -1017,14 +1039,12 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (query->table) query->children.push_back(query->table); - if (to_table) - query->to_table_id = to_table->as()->getTableId(); - query->set(query->columns_list, columns_list); tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); if (comment) query->set(query->comment, comment); @@ -1139,6 +1159,18 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & storage_p.parse(pos, storage, expected); } + std::shared_ptr targets; + if (to_table || storage || inner_storage) + { + targets = std::make_shared(); + if (to_table) + targets->setTableID(ViewTarget::To, to_table->as()->getTableId()); + if (storage) + targets->setInnerEngine(ViewTarget::To, storage); + if (inner_storage) + targets->setInnerEngine(ViewTarget::Inner, inner_storage); + } + // WATERMARK if (s_watermark.ignore(pos, expected)) { @@ -1176,6 +1208,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (!select_p.parse(pos, select, expected)) return false; + auto comment = parseComment(pos, expected); auto query = std::make_shared(); node = query; @@ -1194,13 +1227,11 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->children.push_back(query->database); if (query->table) query->children.push_back(query->table); - - if (to_table) - query->to_table_id = to_table->as()->getTableId(); + if (comment) + query->set(query->comment, comment); query->set(query->columns_list, columns_list); - query->set(query->storage, storage); - query->set(query->inner_storage, inner_storage); + query->is_watermark_strictly_ascending = is_watermark_strictly_ascending; query->is_watermark_ascending = is_watermark_ascending; query->is_watermark_bounded = is_watermark_bounded; @@ -1213,6 +1244,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); return true; } @@ -1436,6 +1468,7 @@ bool ParserCreateDatabaseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e return true; } + bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_create(Keyword::CREATE); @@ -1622,13 +1655,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (query->table) query->children.push_back(query->table); - if (to_table) - query->to_table_id = to_table->as()->getTableId(); - if (to_inner_uuid) - query->to_inner_uuid = parseFromString(to_inner_uuid->as()->value.get()); - query->set(query->columns_list, columns_list); - query->set(query->storage, storage); + if (refresh_strategy) query->set(query->refresh_strategy, refresh_strategy); if (comment) @@ -1639,29 +1667,41 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (query->columns_list && query->columns_list->primary_key) { /// If engine is not set will use default one - if (!query->storage) - query->set(query->storage, std::make_shared()); - else if (query->storage->primary_key) + if (!storage) + storage = std::make_shared(); + auto & storage_ref = typeid_cast(*storage); + if (storage_ref.primary_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); - - query->storage->primary_key = query->columns_list->primary_key; - + storage_ref.primary_key = query->columns_list->primary_key; } if (query->columns_list && (query->columns_list->primary_key_from_columns)) { /// If engine is not set will use default one - if (!query->storage) - query->set(query->storage, std::make_shared()); - else if (query->storage->primary_key) + if (!storage) + storage = std::make_shared(); + auto & storage_ref = typeid_cast(*storage); + if (storage_ref.primary_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); + storage_ref.primary_key = query->columns_list->primary_key_from_columns; + } - query->storage->primary_key = query->columns_list->primary_key_from_columns; + std::shared_ptr targets; + if (to_table || to_inner_uuid || storage) + { + targets = std::make_shared(); + if (to_table) + targets->setTableID(ViewTarget::To, to_table->as()->getTableId()); + if (to_inner_uuid) + targets->setInnerUUID(ViewTarget::To, parseFromString(to_inner_uuid->as()->value.safeGet())); + if (storage) + targets->setInnerEngine(ViewTarget::To, storage); } tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); return true; } diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index bb37491a366..53a62deb22b 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -13,6 +14,7 @@ #include #include + namespace DB { @@ -101,17 +103,15 @@ class IParserColumnDeclaration : public IParserBase { public: explicit IParserColumnDeclaration(bool require_type_ = true, bool allow_null_modifiers_ = false, bool check_keywords_after_name_ = false) - : require_type(require_type_) - , allow_null_modifiers(allow_null_modifiers_) - , check_keywords_after_name(check_keywords_after_name_) + : require_type(require_type_) + , allow_null_modifiers(allow_null_modifiers_) + , check_keywords_after_name(check_keywords_after_name_) { } void enableCheckTypeKeyword() { check_type_keyword = true; } protected: - using ASTDeclarePtr = std::shared_ptr; - const char * getName() const override{ return "column declaration"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; @@ -270,9 +270,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E auto default_function = std::make_shared(); default_function->name = "defaultValueOfTypeName"; default_function->arguments = std::make_shared(); - // Ephemeral columns don't really have secrets but we need to format - // into a String, hence the strange call - default_function->arguments->children.emplace_back(std::make_shared(type->as()->formatForLogging())); + /// Ephemeral columns don't really have secrets but we need to format into a String, hence the strange call + default_function->arguments->children.emplace_back(std::make_shared(type->as()->formatForLogging())); default_expression = default_function; } diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index ad33c7e4558..2edb0141e12 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -1,8 +1,8 @@ #include #include +#include #include -#include #include #include #include @@ -46,48 +46,6 @@ private: } }; -/// Wrapper to allow mixed lists of nested and normal types. -/// Parameters are either: -/// - Nested table elements; -/// - Enum element in form of 'a' = 1; -/// - literal; -/// - Dynamic type arguments; -/// - another data type (or identifier); -class ParserDataTypeArgument : public IParserBase -{ -public: - explicit ParserDataTypeArgument(std::string_view type_name_) : type_name(type_name_) - { - } - -private: - const char * getName() const override { return "data type argument"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - if (type_name == "Dynamic") - { - DynamicArgumentsParser parser; - return parser.parse(pos, node, expected); - } - - ParserNestedTable nested_parser; - ParserDataType data_type_parser; - ParserAllCollectionsOfLiterals literal_parser(false); - - const char * operators[] = {"=", "equals", nullptr}; - ParserLeftAssociativeBinaryOperatorList enum_parser(operators, std::make_unique()); - - if (pos->type == TokenType::BareWord && std::string_view(pos->begin, pos->size()) == "Nested") - return nested_parser.parse(pos, node, expected); - - return enum_parser.parse(pos, node, expected) - || literal_parser.parse(pos, node, expected) - || data_type_parser.parse(pos, node, expected); - } - - std::string_view type_name; -}; - } bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) @@ -198,23 +156,102 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - auto function_node = std::make_shared(); - function_node->name = type_name; - function_node->no_empty_args = true; + auto data_type_node = std::make_shared(); + data_type_node->name = type_name; if (pos->type != TokenType::OpeningRoundBracket) { - node = function_node; + node = data_type_node; return true; } ++pos; /// Parse optional parameters - ParserList args_parser(std::make_unique(type_name), std::make_unique(TokenType::Comma)); - ASTPtr expr_list_args; + ASTPtr expr_list_args = std::make_shared(); + + /// Allow mixed lists of nested and normal types. + /// Parameters are either: + /// - Nested table elements; + /// - Enum element in form of 'a' = 1; + /// - literal; + /// - Dynamic type arguments; + /// - another data type (or identifier); + + size_t arg_num = 0; + bool have_version_of_aggregate_function = false; + while (true) + { + if (arg_num > 0) + { + if (pos->type == TokenType::Comma) + ++pos; + else + break; + } + + ASTPtr arg; + if (type_name == "Dynamic") + { + DynamicArgumentsParser parser; + parser.parse(pos, arg, expected); + } + else if (type_name == "Nested") + { + ParserNestedTable nested_parser; + nested_parser.parse(pos, arg, expected); + } + else if (type_name == "AggregateFunction" || type_name == "SimpleAggregateFunction") + { + /// This is less trivial. + /// The first optional argument for AggregateFunction is a numeric literal, defining the version. + /// The next argument is the function name, optionally with parameters. + /// Subsequent arguments are data types. + + if (arg_num == 0 && type_name == "AggregateFunction") + { + ParserUnsignedInteger version_parser; + if (version_parser.parse(pos, arg, expected)) + { + have_version_of_aggregate_function = true; + expr_list_args->children.emplace_back(std::move(arg)); + ++arg_num; + continue; + } + } + + if (arg_num == (have_version_of_aggregate_function ? 1 : 0)) + { + ParserFunction function_parser; + ParserIdentifier identifier_parser; + function_parser.parse(pos, arg, expected) + || identifier_parser.parse(pos, arg, expected); + } + else + { + ParserDataType data_type_parser; + data_type_parser.parse(pos, arg, expected); + } + } + else + { + ParserDataType data_type_parser; + ParserAllCollectionsOfLiterals literal_parser(false); + + const char * operators[] = {"=", "equals", nullptr}; + ParserLeftAssociativeBinaryOperatorList enum_parser(operators, std::make_unique()); + + enum_parser.parse(pos, arg, expected) + || literal_parser.parse(pos, arg, expected) + || data_type_parser.parse(pos, arg, expected); + } + + if (!arg) + break; + + expr_list_args->children.emplace_back(std::move(arg)); + ++arg_num; + } - if (!args_parser.parse(pos, expr_list_args, expected)) - return false; if (pos->type == TokenType::Comma) // ignore trailing comma inside Nested structures like Tuple(Int, Tuple(Int, String),) ++pos; @@ -222,10 +259,10 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; ++pos; - function_node->arguments = expr_list_args; - function_node->children.push_back(function_node->arguments); + data_type_node->arguments = expr_list_args; + data_type_node->children.push_back(data_type_node->arguments); - node = function_node; + node = data_type_node; return true; } diff --git a/src/Parsers/ParserDescribeTableQuery.cpp b/src/Parsers/ParserDescribeTableQuery.cpp index 92c0cfacd9b..22bbfdb03e1 100644 --- a/src/Parsers/ParserDescribeTableQuery.cpp +++ b/src/Parsers/ParserDescribeTableQuery.cpp @@ -11,15 +11,12 @@ namespace DB { - bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_describe(Keyword::DESCRIBE); ParserKeyword s_desc(Keyword::DESC); ParserKeyword s_table(Keyword::TABLE); ParserKeyword s_settings(Keyword::SETTINGS); - ParserToken s_dot(TokenType::Dot); - ParserIdentifier name_p; ParserSetQuery parser_settings(true); ASTPtr database; @@ -53,5 +50,4 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex return true; } - } diff --git a/src/Parsers/ParserViewTargets.cpp b/src/Parsers/ParserViewTargets.cpp new file mode 100644 index 00000000000..8f010882cdd --- /dev/null +++ b/src/Parsers/ParserViewTargets.cpp @@ -0,0 +1,88 @@ +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +ParserViewTargets::ParserViewTargets() +{ + for (auto kind : magic_enum::enum_values()) + accept_kinds.push_back(kind); +} + +bool ParserViewTargets::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserStringLiteral literal_p; + ParserStorage storage_p{ParserStorage::TABLE_ENGINE}; + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); + + std::shared_ptr res; + + auto result = [&] -> ASTViewTargets & + { + if (!res) + res = std::make_shared(); + return *res; + }; + + for (;;) + { + auto start = pos; + for (auto kind : accept_kinds) + { + auto current = pos; + + auto keyword = ASTViewTargets::getKeywordForInnerUUID(kind); + if (keyword && ParserKeyword{*keyword}.ignore(pos, expected)) + { + ASTPtr ast; + if (literal_p.parse(pos, ast, expected)) + { + result().setInnerUUID(kind, parseFromString(ast->as()->value.safeGet())); + break; + } + } + pos = current; + + keyword = ASTViewTargets::getKeywordForInnerStorage(kind); + if (keyword && ParserKeyword{*keyword}.ignore(pos, expected)) + { + ASTPtr ast; + if (storage_p.parse(pos, ast, expected)) + { + result().setInnerEngine(kind, ast); + break; + } + } + pos = current; + + keyword = ASTViewTargets::getKeywordForTableID(kind); + if (keyword && ParserKeyword{*keyword}.ignore(pos, expected)) + { + ASTPtr ast; + if (table_name_p.parse(pos, ast, expected)) + { + result().setTableID(kind, ast->as()->getTableId()); + break; + } + } + pos = current; + } + if (pos == start) + break; + } + + if (!res || res->targets.empty()) + return false; + + node = res; + return true; +} + +} diff --git a/src/Parsers/ParserViewTargets.h b/src/Parsers/ParserViewTargets.h new file mode 100644 index 00000000000..3af3c0b8df3 --- /dev/null +++ b/src/Parsers/ParserViewTargets.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/// Parses information about target tables (external or inner) of a materialized view or a window view. +/// The function parses one or multiple parts of a CREATE query looking like this: +/// TO db.table_name +/// TO INNER UUID 'XXX' +/// {ENGINE / INNER ENGINE} TableEngine(arguments) [ORDER BY ...] [SETTINGS ...] +/// Returns ASTViewTargets if succeeded. +class ParserViewTargets : public IParserBase +{ +public: + ParserViewTargets(); + explicit ParserViewTargets(const std::vector & accept_kinds_) : accept_kinds(accept_kinds_) { } + +protected: + const char * getName() const override { return "ViewTargets"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + + std::vector accept_kinds; +}; + +} diff --git a/src/Parsers/examples/CMakeLists.txt b/src/Parsers/examples/CMakeLists.txt index 261f234081c..07f60601acd 100644 --- a/src/Parsers/examples/CMakeLists.txt +++ b/src/Parsers/examples/CMakeLists.txt @@ -1,7 +1,7 @@ set(SRCS) clickhouse_add_executable(lexer lexer.cpp ${SRCS}) -target_link_libraries(lexer PRIVATE clickhouse_parsers) +target_link_libraries(lexer PRIVATE clickhouse_parsers clickhouse_common_config) clickhouse_add_executable(select_parser select_parser.cpp ${SRCS} "../../Server/ServerType.cpp") target_link_libraries(select_parser PRIVATE dbms) diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index d5e39a9f123..2fe62aa9be0 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -88,16 +88,16 @@ public: auto column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(node); - ActionsDAGPtr alias_column_actions_dag = std::make_shared(); + ActionsDAG alias_column_actions_dag; PlannerActionsVisitor actions_visitor(planner_context, false); - auto outputs = actions_visitor.visit(*alias_column_actions_dag, column_node->getExpression()); + auto outputs = actions_visitor.visit(alias_column_actions_dag, column_node->getExpression()); if (outputs.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single output in actions dag for alias column {}. Actual {}", column_node->dumpTree(), outputs.size()); const auto & column_name = column_node->getColumnName(); - const auto & alias_node = alias_column_actions_dag->addAlias(*outputs[0], column_name); - alias_column_actions_dag->addOrReplaceInOutputs(alias_node); - table_expression_data.addAliasColumn(column_node->getColumn(), column_identifier, alias_column_actions_dag, select_added_columns); + const auto & alias_node = alias_column_actions_dag.addAlias(*outputs[0], column_name); + alias_column_actions_dag.addOrReplaceInOutputs(alias_node); + table_expression_data.addAliasColumn(column_node->getColumn(), column_identifier, std::move(alias_column_actions_dag), select_added_columns); } return; @@ -335,22 +335,22 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr collect_source_columns_visitor.setKeepAliasColumns(false); collect_source_columns_visitor.visit(query_node_typed.getPrewhere()); - auto prewhere_actions_dag = std::make_shared(); + ActionsDAG prewhere_actions_dag; QueryTreeNodePtr query_tree_node = query_node_typed.getPrewhere(); PlannerActionsVisitor visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); - auto expression_nodes = visitor.visit(*prewhere_actions_dag, query_tree_node); + auto expression_nodes = visitor.visit(prewhere_actions_dag, query_tree_node); if (expression_nodes.size() != 1) throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Invalid PREWHERE. Expected single boolean expression. In query {}", query_node->formatASTForErrorMessage()); - prewhere_actions_dag->getOutputs().push_back(expression_nodes.back()); + prewhere_actions_dag.getOutputs().push_back(expression_nodes.back()); - for (const auto & prewhere_input_node : prewhere_actions_dag->getInputs()) + for (const auto & prewhere_input_node : prewhere_actions_dag.getInputs()) if (required_column_names_without_prewhere.contains(prewhere_input_node->result_name)) - prewhere_actions_dag->getOutputs().push_back(prewhere_input_node); + prewhere_actions_dag.getOutputs().push_back(prewhere_input_node); table_expression_data.setPrewhereFilterActions(std::move(prewhere_actions_dag)); } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index a35ba69d459..968642dc9de 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -12,7 +12,6 @@ #include #include -#include #include #include @@ -216,9 +215,11 @@ FiltersForTableExpressionMap collectFiltersForAnalysis(const QueryTreeNodePtr & if (!read_from_dummy) continue; - auto filter_actions = read_from_dummy->getFilterActionsDAG(); - const auto & table_node = dummy_storage_to_table.at(&read_from_dummy->getStorage()); - res[table_node] = FiltersForTableExpression{std::move(filter_actions), read_from_dummy->getPrewhereInfo()}; + if (auto filter_actions = read_from_dummy->detachFilterActionsDAG()) + { + const auto & table_node = dummy_storage_to_table.at(&read_from_dummy->getStorage()); + res[table_node] = FiltersForTableExpression{std::move(filter_actions), read_from_dummy->getPrewhereInfo()}; + } } return res; @@ -332,34 +333,34 @@ public: }; void addExpressionStep(QueryPlan & query_plan, - const ActionsAndProjectInputsFlagPtr & expression_actions, + ActionsAndProjectInputsFlagPtr & expression_actions, const std::string & step_description, - std::vector & result_actions_to_execute) + UsefulSets & useful_sets) { - auto actions = expression_actions->dag.clone(); + auto actions = std::move(expression_actions->dag); if (expression_actions->project_input) - actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + actions.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); - result_actions_to_execute.push_back(actions); - auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), actions); + auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(actions)); + appendSetsFromActionsDAG(expression_step->getExpression(), useful_sets); expression_step->setStepDescription(step_description); query_plan.addStep(std::move(expression_step)); } void addFilterStep(QueryPlan & query_plan, - const FilterAnalysisResult & filter_analysis_result, + FilterAnalysisResult & filter_analysis_result, const std::string & step_description, - std::vector & result_actions_to_execute) + UsefulSets & useful_sets) { - auto actions = filter_analysis_result.filter_actions->dag.clone(); + auto actions = std::move(filter_analysis_result.filter_actions->dag); if (filter_analysis_result.filter_actions->project_input) - actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + actions.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); - result_actions_to_execute.push_back(actions); auto where_step = std::make_unique(query_plan.getCurrentDataStream(), - actions, + std::move(actions), filter_analysis_result.filter_column_name, filter_analysis_result.remove_filter_column); + appendSetsFromActionsDAG(where_step->getExpression(), useful_sets); where_step->setStepDescription(step_description); query_plan.addStep(std::move(where_step)); } @@ -543,39 +544,41 @@ void addMergingAggregatedStep(QueryPlan & query_plan, } void addTotalsHavingStep(QueryPlan & query_plan, - const PlannerExpressionsAnalysisResult & expression_analysis_result, + PlannerExpressionsAnalysisResult & expression_analysis_result, const QueryAnalysisResult & query_analysis_result, const PlannerContextPtr & planner_context, const QueryNode & query_node, - std::vector & result_actions_to_execute) + UsefulSets & useful_sets) { const auto & query_context = planner_context->getQueryContext(); const auto & settings = query_context->getSettingsRef(); - const auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); - const auto & having_analysis_result = expression_analysis_result.getHaving(); + auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); + auto & having_analysis_result = expression_analysis_result.getHaving(); bool need_finalize = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); - ActionsDAGPtr actions; + std::optional actions; if (having_analysis_result.filter_actions) { - actions = having_analysis_result.filter_actions->dag.clone(); + actions = std::move(having_analysis_result.filter_actions->dag); if (having_analysis_result.filter_actions->project_input) actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); - - result_actions_to_execute.push_back(actions); } auto totals_having_step = std::make_unique( query_plan.getCurrentDataStream(), aggregation_analysis_result.aggregate_descriptions, query_analysis_result.aggregate_overflow_row, - actions, + std::move(actions), having_analysis_result.filter_column_name, having_analysis_result.remove_filter_column, settings.totals_mode, settings.totals_auto_threshold, need_finalize); + + if (having_analysis_result.filter_actions) + appendSetsFromActionsDAG(*totals_having_step->getActions(), useful_sets); + query_plan.addStep(std::move(totals_having_step)); } @@ -717,13 +720,13 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, if (query_node.hasInterpolate()) { - auto interpolate_actions_dag = std::make_shared(); + ActionsDAG interpolate_actions_dag; auto query_plan_columns = query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); for (auto & query_plan_column : query_plan_columns) { /// INTERPOLATE actions dag input columns must be non constant query_plan_column.column = nullptr; - interpolate_actions_dag->addInput(query_plan_column); + interpolate_actions_dag.addInput(query_plan_column); } auto & interpolate_list_node = query_node.getInterpolate()->as(); @@ -731,12 +734,12 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, if (interpolate_list_nodes.empty()) { - for (const auto * input_node : interpolate_actions_dag->getInputs()) + for (const auto * input_node : interpolate_actions_dag.getInputs()) { if (column_names_with_fill.contains(input_node->result_name)) continue; - interpolate_actions_dag->getOutputs().push_back(input_node); + interpolate_actions_dag.getOutputs().push_back(input_node); } } else @@ -745,18 +748,13 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, { auto & interpolate_node_typed = interpolate_node->as(); - PlannerActionsVisitor planner_actions_visitor( - planner_context, - /* use_column_identifier_as_action_node_name_, (default value)*/ true, - /// Prefer the INPUT to CONSTANT nodes (actions must be non constant) - /* always_use_const_column_for_constant_nodes */ false); - - auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, + PlannerActionsVisitor planner_actions_visitor(planner_context); + auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression()); if (expression_to_interpolate_expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression to interpolate expected to have single action node"); - auto interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, + auto interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); if (interpolate_expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interpolate expression expected to have single action node"); @@ -767,16 +765,16 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, const auto * interpolate_expression = interpolate_expression_nodes[0]; if (!interpolate_expression->result_type->equals(*expression_to_interpolate->result_type)) { - interpolate_expression = &interpolate_actions_dag->addCast(*interpolate_expression, + interpolate_expression = &interpolate_actions_dag.addCast(*interpolate_expression, expression_to_interpolate->result_type, interpolate_expression->result_name); } - const auto * alias_node = &interpolate_actions_dag->addAlias(*interpolate_expression, expression_to_interpolate_name); - interpolate_actions_dag->getOutputs().push_back(alias_node); + const auto * alias_node = &interpolate_actions_dag.addAlias(*interpolate_expression, expression_to_interpolate_name); + interpolate_actions_dag.getOutputs().push_back(alias_node); } - interpolate_actions_dag->removeUnusedActions(); + interpolate_actions_dag.removeUnusedActions(); } Aliases empty_aliases; @@ -888,12 +886,12 @@ bool addPreliminaryLimitOptimizationStepIfNeeded(QueryPlan & query_plan, * WINDOW functions. */ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, - const PlannerExpressionsAnalysisResult & expressions_analysis_result, + PlannerExpressionsAnalysisResult & expressions_analysis_result, const QueryAnalysisResult & query_analysis_result, const PlannerContextPtr & planner_context, const PlannerQueryProcessingInfo & query_processing_info, const QueryTreeNodePtr & query_tree, - std::vector & result_actions_to_execute) + UsefulSets & useful_sets) { const auto & query_node = query_tree->as(); @@ -924,8 +922,8 @@ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, if (expressions_analysis_result.hasLimitBy()) { - const auto & limit_by_analysis_result = expressions_analysis_result.getLimitBy(); - addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", result_actions_to_execute); + auto & limit_by_analysis_result = expressions_analysis_result.getLimitBy(); + addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", useful_sets); addLimitByStep(query_plan, limit_by_analysis_result, query_node); } @@ -935,12 +933,12 @@ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, void addWindowSteps(QueryPlan & query_plan, const PlannerContextPtr & planner_context, - const WindowAnalysisResult & window_analysis_result) + WindowAnalysisResult & window_analysis_result) { const auto & query_context = planner_context->getQueryContext(); const auto & settings = query_context->getSettingsRef(); - auto window_descriptions = window_analysis_result.window_descriptions; + auto & window_descriptions = window_analysis_result.window_descriptions; sortWindowDescriptions(window_descriptions); size_t window_descriptions_size = window_descriptions.size(); @@ -1062,47 +1060,15 @@ void addOffsetStep(QueryPlan & query_plan, const QueryAnalysisResult & query_ana } } -void collectSetsFromActionsDAG(const ActionsDAGPtr & dag, std::unordered_set & useful_sets) -{ - for (const auto & node : dag->getNodes()) - { - if (node.column) - { - const IColumn * column = node.column.get(); - if (const auto * column_const = typeid_cast(column)) - column = &column_const->getDataColumn(); - - if (const auto * column_set = typeid_cast(column)) - useful_sets.insert(column_set->getData().get()); - } - - if (node.type == ActionsDAG::ActionType::FUNCTION && node.function_base->getName() == "indexHint") - { - ActionsDAG::NodeRawConstPtrs children; - if (const auto * adaptor = typeid_cast(node.function_base.get())) - { - if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) - { - collectSetsFromActionsDAG(index_hint->getActions(), useful_sets); - } - } - } - } -} - void addBuildSubqueriesForSetsStepIfNeeded( QueryPlan & query_plan, const SelectQueryOptions & select_query_options, const PlannerContextPtr & planner_context, - const std::vector & result_actions_to_execute) + const UsefulSets & useful_sets) { auto subqueries = planner_context->getPreparedSets().getSubqueries(); - std::unordered_set useful_sets; - for (const auto & actions_to_execute : result_actions_to_execute) - collectSetsFromActionsDAG(actions_to_execute, useful_sets); - - auto predicate = [&useful_sets](const auto & set) { return !useful_sets.contains(set.get()); }; + auto predicate = [&useful_sets](const auto & set) { return !useful_sets.contains(set); }; auto it = std::remove_if(subqueries.begin(), subqueries.end(), std::move(predicate)); subqueries.erase(it, subqueries.end()); @@ -1164,11 +1130,11 @@ void addAdditionalFilterStepIfNeeded(QueryPlan & query_plan, auto fake_table_expression = std::make_shared(std::move(storage), query_context); auto filter_info = buildFilterInfo(additional_result_filter_ast, fake_table_expression, planner_context, std::move(fake_name_set)); - if (!filter_info.actions || !query_plan.isInitialized()) + if (!query_plan.isInitialized()) return; auto filter_step = std::make_unique(query_plan.getCurrentDataStream(), - filter_info.actions, + std::move(filter_info.actions), filter_info.column_name, filter_info.do_remove_column); filter_step->setStepDescription("additional result filter"); @@ -1448,7 +1414,7 @@ void Planner::buildPlanForQueryNode() checkStoragesSupportTransactions(planner_context); const auto & table_filters = planner_context->getGlobalPlannerContext()->filters_for_table_expressions; - if (!select_query_options.only_analyze && !table_filters.empty()) // && top_level) + if (!select_query_options.only_analyze && !table_filters.empty()) { for (auto & [table_node, table_expression_data] : planner_context->getTableExpressionNodeToData()) { @@ -1456,7 +1422,7 @@ void Planner::buildPlanForQueryNode() if (it != table_filters.end()) { const auto & filters = it->second; - table_expression_data.setFilterActions(filters.filter_actions); + table_expression_data.setFilterActions(filters.filter_actions->clone()); table_expression_data.setPrewhereInfo(filters.prewhere_info); } } @@ -1547,15 +1513,15 @@ void Planner::buildPlanForQueryNode() planner_context, query_processing_info); - std::vector result_actions_to_execute = std::move(join_tree_query_plan.actions_dags); + auto useful_sets = std::move(join_tree_query_plan.useful_sets); for (auto & [_, table_expression_data] : planner_context->getTableExpressionNodeToData()) { if (table_expression_data.getPrewhereFilterActions()) - result_actions_to_execute.push_back(table_expression_data.getPrewhereFilterActions()); + appendSetsFromActionsDAG(*table_expression_data.getPrewhereFilterActions(), useful_sets); if (table_expression_data.getRowLevelFilterActions()) - result_actions_to_execute.push_back(table_expression_data.getRowLevelFilterActions()); + appendSetsFromActionsDAG(*table_expression_data.getRowLevelFilterActions(), useful_sets); } if (query_processing_info.isIntermediateStage()) @@ -1566,7 +1532,7 @@ void Planner::buildPlanForQueryNode() planner_context, query_processing_info, query_tree, - result_actions_to_execute); + useful_sets); if (expression_analysis_result.hasAggregation()) { @@ -1578,13 +1544,13 @@ void Planner::buildPlanForQueryNode() if (query_processing_info.isFirstStage()) { if (expression_analysis_result.hasWhere()) - addFilterStep(query_plan, expression_analysis_result.getWhere(), "WHERE", result_actions_to_execute); + addFilterStep(query_plan, expression_analysis_result.getWhere(), "WHERE", useful_sets); if (expression_analysis_result.hasAggregation()) { - const auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); + auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); if (aggregation_analysis_result.before_aggregation_actions) - addExpressionStep(query_plan, aggregation_analysis_result.before_aggregation_actions, "Before GROUP BY", result_actions_to_execute); + addExpressionStep(query_plan, aggregation_analysis_result.before_aggregation_actions, "Before GROUP BY", useful_sets); addAggregationStep(query_plan, aggregation_analysis_result, query_analysis_result, planner_context, select_query_info); } @@ -1601,9 +1567,9 @@ void Planner::buildPlanForQueryNode() * window functions, we can't execute ORDER BY and DISTINCT * now, on shard (first_stage). */ - const auto & window_analysis_result = expression_analysis_result.getWindow(); + auto & window_analysis_result = expression_analysis_result.getWindow(); if (window_analysis_result.before_window_actions) - addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before WINDOW", result_actions_to_execute); + addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before WINDOW", useful_sets); } else { @@ -1611,8 +1577,8 @@ void Planner::buildPlanForQueryNode() * Projection expressions, preliminary DISTINCT and before ORDER BY expressions * now, on shards (first_stage). */ - const auto & projection_analysis_result = expression_analysis_result.getProjection(); - addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", result_actions_to_execute); + auto & projection_analysis_result = expression_analysis_result.getProjection(); + addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", useful_sets); if (query_node.isDistinct()) { @@ -1627,8 +1593,8 @@ void Planner::buildPlanForQueryNode() if (expression_analysis_result.hasSort()) { - const auto & sort_analysis_result = expression_analysis_result.getSort(); - addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", result_actions_to_execute); + auto & sort_analysis_result = expression_analysis_result.getSort(); + addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", useful_sets); } } } @@ -1639,7 +1605,7 @@ void Planner::buildPlanForQueryNode() planner_context, query_processing_info, query_tree, - result_actions_to_execute); + useful_sets); } if (query_processing_info.isSecondStage() || query_processing_info.isFromAggregationState()) @@ -1661,14 +1627,14 @@ void Planner::buildPlanForQueryNode() if (query_node.isGroupByWithTotals()) { - addTotalsHavingStep(query_plan, expression_analysis_result, query_analysis_result, planner_context, query_node, result_actions_to_execute); + addTotalsHavingStep(query_plan, expression_analysis_result, query_analysis_result, planner_context, query_node, useful_sets); having_executed = true; } addCubeOrRollupStepIfNeeded(query_plan, aggregation_analysis_result, query_analysis_result, planner_context, select_query_info, query_node); if (!having_executed && expression_analysis_result.hasHaving()) - addFilterStep(query_plan, expression_analysis_result.getHaving(), "HAVING", result_actions_to_execute); + addFilterStep(query_plan, expression_analysis_result.getHaving(), "HAVING", useful_sets); } if (query_processing_info.isFromAggregationState()) @@ -1681,18 +1647,18 @@ void Planner::buildPlanForQueryNode() { if (expression_analysis_result.hasWindow()) { - const auto & window_analysis_result = expression_analysis_result.getWindow(); + auto & window_analysis_result = expression_analysis_result.getWindow(); if (expression_analysis_result.hasAggregation()) - addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before window functions", result_actions_to_execute); + addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before window functions", useful_sets); addWindowSteps(query_plan, planner_context, window_analysis_result); } if (expression_analysis_result.hasQualify()) - addFilterStep(query_plan, expression_analysis_result.getQualify(), "QUALIFY", result_actions_to_execute); + addFilterStep(query_plan, expression_analysis_result.getQualify(), "QUALIFY", useful_sets); - const auto & projection_analysis_result = expression_analysis_result.getProjection(); - addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", result_actions_to_execute); + auto & projection_analysis_result = expression_analysis_result.getProjection(); + addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", useful_sets); if (query_node.isDistinct()) { @@ -1707,8 +1673,8 @@ void Planner::buildPlanForQueryNode() if (expression_analysis_result.hasSort()) { - const auto & sort_analysis_result = expression_analysis_result.getSort(); - addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", result_actions_to_execute); + auto & sort_analysis_result = expression_analysis_result.getSort(); + addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", useful_sets); } } else @@ -1760,8 +1726,8 @@ void Planner::buildPlanForQueryNode() if (!query_processing_info.isFromAggregationState() && expression_analysis_result.hasLimitBy()) { - const auto & limit_by_analysis_result = expression_analysis_result.getLimitBy(); - addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", result_actions_to_execute); + auto & limit_by_analysis_result = expression_analysis_result.getLimitBy(); + addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", useful_sets); addLimitByStep(query_plan, limit_by_analysis_result, query_node); } @@ -1792,8 +1758,8 @@ void Planner::buildPlanForQueryNode() /// Project names is not done on shards, because initiator will not find columns in blocks if (!query_processing_info.isToAggregationState()) { - const auto & projection_analysis_result = expression_analysis_result.getProjection(); - addExpressionStep(query_plan, projection_analysis_result.project_names_actions, "Project names", result_actions_to_execute); + auto & projection_analysis_result = expression_analysis_result.getProjection(); + addExpressionStep(query_plan, projection_analysis_result.project_names_actions, "Project names", useful_sets); } // For additional_result_filter setting @@ -1801,7 +1767,7 @@ void Planner::buildPlanForQueryNode() } if (!select_query_options.only_analyze) - addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context, result_actions_to_execute); + addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context, useful_sets); query_node_to_plan_step_mapping[&query_node] = query_plan.getRootNode(); } diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 6fd37df11c6..1960855792c 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -487,33 +487,16 @@ public: return node; } - [[nodiscard]] String addConstantIfNecessary( - const std::string & node_name, const ColumnWithTypeAndName & column, bool always_use_const_column_for_constant_nodes) + const ActionsDAG::Node * addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column) { - chassert(column.column != nullptr); auto it = node_name_to_node.find(node_name); - if (it != node_name_to_node.end() && (!always_use_const_column_for_constant_nodes || it->second->column)) - return {node_name}; - if (it != node_name_to_node.end()) - { - /// There is a node with this name, but it doesn't have a column - /// This likely happens because we executed the query until WithMergeableState with a const node in the - /// WHERE clause and, as the results of headers are materialized, the column was removed - /// Let's add a new column and keep this - String dupped_name{node_name + "_dupped"}; - if (node_name_to_node.find(dupped_name) != node_name_to_node.end()) - return dupped_name; - - const auto * node = &actions_dag.addColumn(column); - node_name_to_node[dupped_name] = node; - return dupped_name; - } + return it->second; const auto * node = &actions_dag.addColumn(column); node_name_to_node[node->result_name] = node; - return {node_name}; + return node; } template @@ -542,7 +525,7 @@ public: } private: - std::unordered_map node_name_to_node; + std::unordered_map node_name_to_node; ActionsDAG & actions_dag; QueryTreeNodePtr scope_node; }; @@ -550,11 +533,9 @@ private: class PlannerActionsVisitorImpl { public: - PlannerActionsVisitorImpl( - ActionsDAG & actions_dag, + PlannerActionsVisitorImpl(ActionsDAG & actions_dag, const PlannerContextPtr & planner_context_, - bool use_column_identifier_as_action_node_name_, - bool always_use_const_column_for_constant_nodes_); + bool use_column_identifier_as_action_node_name_); ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node); @@ -614,18 +595,14 @@ private: const PlannerContextPtr planner_context; ActionNodeNameHelper action_node_name_helper; bool use_column_identifier_as_action_node_name; - bool always_use_const_column_for_constant_nodes; }; -PlannerActionsVisitorImpl::PlannerActionsVisitorImpl( - ActionsDAG & actions_dag, +PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAG & actions_dag, const PlannerContextPtr & planner_context_, - bool use_column_identifier_as_action_node_name_, - bool always_use_const_column_for_constant_nodes_) + bool use_column_identifier_as_action_node_name_) : planner_context(planner_context_) , action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) - , always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_) { actions_stack.emplace_back(actions_dag, nullptr); } @@ -748,16 +725,17 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi column.type = constant_type; column.column = column.type->createColumnConst(1, constant_literal); - String final_name = actions_stack[0].addConstantIfNecessary(constant_node_name, column, always_use_const_column_for_constant_nodes); + actions_stack[0].addConstantIfNecessary(constant_node_name, column); size_t actions_stack_size = actions_stack.size(); for (size_t i = 1; i < actions_stack_size; ++i) { auto & actions_stack_node = actions_stack[i]; - actions_stack_node.addInputConstantColumnIfNecessary(final_name, column); + actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column); } - return {final_name, Levels(0)}; + return {constant_node_name, Levels(0)}; + } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node) @@ -781,15 +759,15 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi lambda_arguments_names_and_types.emplace_back(lambda_argument_name, std::move(lambda_argument_type)); } - auto lambda_actions_dag = std::make_shared(); - actions_stack.emplace_back(*lambda_actions_dag, node); + ActionsDAG lambda_actions_dag; + actions_stack.emplace_back(lambda_actions_dag, node); auto [lambda_expression_node_name, levels] = visitImpl(lambda_node.getExpression()); - lambda_actions_dag->getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name)); - lambda_actions_dag->removeUnusedActions(Names(1, lambda_expression_node_name)); + lambda_actions_dag.getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name)); + lambda_actions_dag.removeUnusedActions(Names(1, lambda_expression_node_name)); auto expression_actions_settings = ExpressionActionsSettings::fromContext(planner_context->getQueryContext(), CompileExpressions::yes); - auto lambda_actions = std::make_shared(lambda_actions_dag, expression_actions_settings); + auto lambda_actions = std::make_shared(std::move(lambda_actions_dag), expression_actions_settings); Names captured_column_names; ActionsDAG::NodeRawConstPtrs lambda_children; @@ -886,16 +864,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma else column.column = std::move(column_set); - String final_name = actions_stack[0].addConstantIfNecessary(column.name, column, always_use_const_column_for_constant_nodes); + actions_stack[0].addConstantIfNecessary(column.name, column); size_t actions_stack_size = actions_stack.size(); for (size_t i = 1; i < actions_stack_size; ++i) { auto & actions_stack_node = actions_stack[i]; - actions_stack_node.addInputConstantColumnIfNecessary(final_name, column); + actions_stack_node.addInputConstantColumnIfNecessary(column.name, column); } - return {final_name, Levels(0)}; + return {column.name, Levels(0)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node) @@ -903,14 +881,14 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi const auto & function_node = node->as(); auto function_node_name = action_node_name_helper.calculateActionNodeName(node); - auto index_hint_actions_dag = std::make_shared(); - auto & index_hint_actions_dag_outputs = index_hint_actions_dag->getOutputs(); + ActionsDAG index_hint_actions_dag; + auto & index_hint_actions_dag_outputs = index_hint_actions_dag.getOutputs(); std::unordered_set index_hint_actions_dag_output_node_names; PlannerActionsVisitor actions_visitor(planner_context); for (const auto & argument : function_node.getArguments()) { - auto index_hint_argument_expression_dag_nodes = actions_visitor.visit(*index_hint_actions_dag, argument); + auto index_hint_argument_expression_dag_nodes = actions_visitor.visit(index_hint_actions_dag, argument); for (auto & expression_dag_node : index_hint_argument_expression_dag_nodes) { @@ -1032,19 +1010,14 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi } -PlannerActionsVisitor::PlannerActionsVisitor( - const PlannerContextPtr & planner_context_, - bool use_column_identifier_as_action_node_name_, - bool always_use_const_column_for_constant_nodes_) +PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_) : planner_context(planner_context_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) - , always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_) {} ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node) { - PlannerActionsVisitorImpl actions_visitor_impl( - actions_dag, planner_context, use_column_identifier_as_action_node_name, always_use_const_column_for_constant_nodes); + PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name); return actions_visitor_impl.visit(expression_node); } diff --git a/src/Planner/PlannerActionsVisitor.h b/src/Planner/PlannerActionsVisitor.h index 1dbd149bc4b..6bb32047327 100644 --- a/src/Planner/PlannerActionsVisitor.h +++ b/src/Planner/PlannerActionsVisitor.h @@ -27,17 +27,11 @@ using PlannerContextPtr = std::shared_ptr; * During actions build, there is special handling for following functions: * 1. Aggregate functions are added in actions dag as INPUT nodes. Aggregate functions arguments are not added. * 2. For function `in` and its variants, already collected sets from planner context are used. - * 3. When building actions that use CONSTANT nodes, by default we ignore pre-existing INPUTs if those don't have - * a column (a const column always has a column). This is for compatibility with previous headers. We disable this - * behaviour when we explicitly want to override CONSTANT nodes with the input (resolving InterpolateNode for example) */ class PlannerActionsVisitor { public: - explicit PlannerActionsVisitor( - const PlannerContextPtr & planner_context_, - bool use_column_identifier_as_action_node_name_ = true, - bool always_use_const_column_for_constant_nodes_ = true); + explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_ = true); /** Add actions necessary to calculate expression node into expression dag. * Necessary actions are not added in actions dag output. @@ -48,7 +42,6 @@ public: private: const PlannerContextPtr planner_context; bool use_column_identifier_as_action_node_name = true; - bool always_use_const_column_for_constant_nodes = true; }; /** Calculate query tree expression node action dag name and add them into node to name map. diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index 418240fa34e..f35772ef7c0 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -25,7 +25,7 @@ class TableNode; struct FiltersForTableExpression { - ActionsDAGPtr filter_actions; + std::optional filter_actions; PrewhereInfoPtr prewhere_info; }; diff --git a/src/Planner/PlannerExpressionAnalysis.h b/src/Planner/PlannerExpressionAnalysis.h index 820df7131a7..283fcac7aba 100644 --- a/src/Planner/PlannerExpressionAnalysis.h +++ b/src/Planner/PlannerExpressionAnalysis.h @@ -64,7 +64,7 @@ public: : projection_analysis_result(std::move(projection_analysis_result_)) {} - const ProjectionAnalysisResult & getProjection() const + ProjectionAnalysisResult & getProjection() { return projection_analysis_result; } @@ -74,7 +74,7 @@ public: return where_analysis_result.filter_actions != nullptr; } - const FilterAnalysisResult & getWhere() const + FilterAnalysisResult & getWhere() { return where_analysis_result; } @@ -89,7 +89,7 @@ public: return !aggregation_analysis_result.aggregation_keys.empty() || !aggregation_analysis_result.aggregate_descriptions.empty(); } - const AggregationAnalysisResult & getAggregation() const + AggregationAnalysisResult & getAggregation() { return aggregation_analysis_result; } @@ -104,7 +104,7 @@ public: return having_analysis_result.filter_actions != nullptr; } - const FilterAnalysisResult & getHaving() const + FilterAnalysisResult & getHaving() { return having_analysis_result; } @@ -119,7 +119,7 @@ public: return !window_analysis_result.window_descriptions.empty(); } - const WindowAnalysisResult & getWindow() const + WindowAnalysisResult & getWindow() { return window_analysis_result; } @@ -134,7 +134,7 @@ public: return qualify_analysis_result.filter_actions != nullptr; } - const FilterAnalysisResult & getQualify() const + FilterAnalysisResult & getQualify() { return qualify_analysis_result; } @@ -149,7 +149,7 @@ public: return sort_analysis_result.before_order_by_actions != nullptr; } - const SortAnalysisResult & getSort() const + SortAnalysisResult & getSort() { return sort_analysis_result; } @@ -164,7 +164,7 @@ public: return limit_by_analysis_result.before_limit_by_actions != nullptr; } - const LimitByAnalysisResult & getLimitBy() const + LimitByAnalysisResult & getLimitBy() { return limit_by_analysis_result; } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index ab599331a9d..a3db0395ccc 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -77,7 +77,6 @@ namespace ErrorCodes extern const int INVALID_JOIN_ON_EXPRESSION; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; - extern const int SYNTAX_ERROR; extern const int ACCESS_DENIED; extern const int PARAMETER_OUT_OF_BOUND; extern const int TOO_MANY_COLUMNS; @@ -438,7 +437,7 @@ void updatePrewhereOutputsIfNeeded(SelectQueryInfo & table_expression_query_info std::unordered_set required_output_nodes; - for (const auto * input : prewhere_actions->getInputs()) + for (const auto * input : prewhere_actions.getInputs()) { if (required_columns.contains(input->result_name)) required_output_nodes.insert(input); @@ -447,7 +446,7 @@ void updatePrewhereOutputsIfNeeded(SelectQueryInfo & table_expression_query_info if (required_output_nodes.empty()) return; - auto & prewhere_outputs = prewhere_actions->getOutputs(); + auto & prewhere_outputs = prewhere_actions.getOutputs(); for (const auto & output : prewhere_outputs) { auto required_output_node_it = required_output_nodes.find(output); @@ -460,7 +459,7 @@ void updatePrewhereOutputsIfNeeded(SelectQueryInfo & table_expression_query_info prewhere_outputs.insert(prewhere_outputs.end(), required_output_nodes.begin(), required_output_nodes.end()); } -FilterDAGInfo buildRowPolicyFilterIfNeeded(const StoragePtr & storage, +std::optional buildRowPolicyFilterIfNeeded(const StoragePtr & storage, SelectQueryInfo & table_expression_query_info, PlannerContextPtr & planner_context, std::set & used_row_policies) @@ -481,7 +480,7 @@ FilterDAGInfo buildRowPolicyFilterIfNeeded(const StoragePtr & storage, return buildFilterInfo(row_policy_filter->expression, table_expression_query_info.table_expression, planner_context); } -FilterDAGInfo buildCustomKeyFilterIfNeeded(const StoragePtr & storage, +std::optional buildCustomKeyFilterIfNeeded(const StoragePtr & storage, SelectQueryInfo & table_expression_query_info, PlannerContextPtr & planner_context) { @@ -515,7 +514,7 @@ FilterDAGInfo buildCustomKeyFilterIfNeeded(const StoragePtr & storage, } /// Apply filters from additional_table_filters setting -FilterDAGInfo buildAdditionalFiltersIfNeeded(const StoragePtr & storage, +std::optional buildAdditionalFiltersIfNeeded(const StoragePtr & storage, const String & table_expression_alias, SelectQueryInfo & table_expression_query_info, PlannerContextPtr & planner_context) @@ -591,21 +590,21 @@ UInt64 mainQueryNodeBlockSizeByLimit(const SelectQueryInfo & select_query_info) } std::unique_ptr createComputeAliasColumnsStep( - const std::unordered_map & alias_column_expressions, const DataStream & current_data_stream) + std::unordered_map & alias_column_expressions, const DataStream & current_data_stream) { - ActionsDAGPtr merged_alias_columns_actions_dag = std::make_shared(current_data_stream.header.getColumnsWithTypeAndName()); - ActionsDAG::NodeRawConstPtrs action_dag_outputs = merged_alias_columns_actions_dag->getInputs(); + ActionsDAG merged_alias_columns_actions_dag(current_data_stream.header.getColumnsWithTypeAndName()); + ActionsDAG::NodeRawConstPtrs action_dag_outputs = merged_alias_columns_actions_dag.getInputs(); - for (const auto & [column_name, alias_column_actions_dag] : alias_column_expressions) + for (auto & [column_name, alias_column_actions_dag] : alias_column_expressions) { - const auto & current_outputs = alias_column_actions_dag->getOutputs(); + const auto & current_outputs = alias_column_actions_dag.getOutputs(); action_dag_outputs.insert(action_dag_outputs.end(), current_outputs.begin(), current_outputs.end()); - merged_alias_columns_actions_dag->mergeNodes(std::move(*alias_column_actions_dag)); + merged_alias_columns_actions_dag.mergeNodes(std::move(alias_column_actions_dag)); } for (const auto * output_node : action_dag_outputs) - merged_alias_columns_actions_dag->addOrReplaceInOutputs(*output_node); - merged_alias_columns_actions_dag->removeUnusedActions(false); + merged_alias_columns_actions_dag.addOrReplaceInOutputs(*output_node); + merged_alias_columns_actions_dag.removeUnusedActions(false); auto alias_column_step = std::make_unique(current_data_stream, std::move(merged_alias_columns_actions_dag)); alias_column_step->setStepDescription("Compute alias columns"); @@ -648,7 +647,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto table_expression_query_info = select_query_info; table_expression_query_info.table_expression = table_expression; - table_expression_query_info.filter_actions_dag = table_expression_data.getFilterActions(); + if (const auto & filter_actions = table_expression_data.getFilterActions()) + table_expression_query_info.filter_actions_dag = std::make_shared(filter_actions->clone()); table_expression_query_info.current_table_chosen_for_reading_with_parallel_replicas = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table; @@ -779,7 +779,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (prewhere_actions) { prewhere_info = std::make_shared(); - prewhere_info->prewhere_actions = prewhere_actions; + prewhere_info->prewhere_actions = prewhere_actions->clone(); prewhere_info->prewhere_column_name = prewhere_actions->getOutputs().at(0)->result_name; prewhere_info->remove_prewhere_column = true; prewhere_info->need_filter = true; @@ -790,11 +790,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres const auto & columns_names = table_expression_data.getColumnNames(); std::vector> where_filters; - const auto add_filter = [&](const FilterDAGInfo & filter_info, std::string description) + const auto add_filter = [&](FilterDAGInfo & filter_info, std::string description) { - if (!filter_info.actions) - return; - bool is_final = table_expression_query_info.table_expression_modifiers && table_expression_query_info.table_expression_modifiers->hasFinal(); bool optimize_move_to_prewhere @@ -804,46 +801,45 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (storage->canMoveConditionsToPrewhere() && optimize_move_to_prewhere && (!supported_prewhere_columns || supported_prewhere_columns->contains(filter_info.column_name))) { if (!prewhere_info) - prewhere_info = std::make_shared(); - - if (!prewhere_info->prewhere_actions) { - prewhere_info->prewhere_actions = filter_info.actions; + prewhere_info = std::make_shared(); + prewhere_info->prewhere_actions = std::move(filter_info.actions); prewhere_info->prewhere_column_name = filter_info.column_name; prewhere_info->remove_prewhere_column = filter_info.do_remove_column; prewhere_info->need_filter = true; } else if (!prewhere_info->row_level_filter) { - prewhere_info->row_level_filter = filter_info.actions; + prewhere_info->row_level_filter = std::move(filter_info.actions); prewhere_info->row_level_column_name = filter_info.column_name; prewhere_info->need_filter = true; } else { - where_filters.emplace_back(filter_info, std::move(description)); + where_filters.emplace_back(std::move(filter_info), std::move(description)); } } else { - where_filters.emplace_back(filter_info, std::move(description)); + where_filters.emplace_back(std::move(filter_info), std::move(description)); } }; auto row_policy_filter_info = buildRowPolicyFilterIfNeeded(storage, table_expression_query_info, planner_context, used_row_policies); - add_filter(row_policy_filter_info, "Row-level security filter"); - if (row_policy_filter_info.actions) - table_expression_data.setRowLevelFilterActions(row_policy_filter_info.actions); + if (row_policy_filter_info) + { + table_expression_data.setRowLevelFilterActions(row_policy_filter_info->actions.clone()); + add_filter(*row_policy_filter_info, "Row-level security filter"); + } if (query_context->canUseParallelReplicasCustomKey()) { if (settings.parallel_replicas_count > 1) { - auto parallel_replicas_custom_key_filter_info - = buildCustomKeyFilterIfNeeded(storage, table_expression_query_info, planner_context); - add_filter(parallel_replicas_custom_key_filter_info, "Parallel replicas custom key filter"); + if (auto parallel_replicas_custom_key_filter_info= buildCustomKeyFilterIfNeeded(storage, table_expression_query_info, planner_context)) + add_filter(*parallel_replicas_custom_key_filter_info, "Parallel replicas custom key filter"); } else if (auto * distributed = typeid_cast(storage.get()); distributed && query_context->canUseParallelReplicasCustomKeyForCluster(*distributed->getCluster())) @@ -858,9 +854,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } const auto & table_expression_alias = table_expression->getOriginalAlias(); - auto additional_filters_info - = buildAdditionalFiltersIfNeeded(storage, table_expression_alias, table_expression_query_info, planner_context); - add_filter(additional_filters_info, "additional filter"); + if (auto additional_filters_info = buildAdditionalFiltersIfNeeded(storage, table_expression_alias, table_expression_query_info, planner_context)) + add_filter(*additional_filters_info, "additional filter"); from_stage = storage->getQueryProcessingStage( query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); @@ -998,22 +993,20 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } } - const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); + auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); if (!alias_column_expressions.empty() && query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns) { auto alias_column_step = createComputeAliasColumnsStep(alias_column_expressions, query_plan.getCurrentDataStream()); query_plan.addStep(std::move(alias_column_step)); } - for (const auto & filter_info_and_description : where_filters) + for (auto && [filter_info, description] : where_filters) { - const auto & [filter_info, description] = filter_info_and_description; if (query_plan.isInitialized() && - from_stage == QueryProcessingStage::FetchColumns && - filter_info.actions) + from_stage == QueryProcessingStage::FetchColumns) { auto filter_step = std::make_unique(query_plan.getCurrentDataStream(), - filter_info.actions, + std::move(filter_info.actions), filter_info.column_name, filter_info.do_remove_column); filter_step->setStepDescription(description); @@ -1089,7 +1082,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres query_plan = std::move(subquery_planner).extractQueryPlan(); } - const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); + auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); if (!alias_column_expressions.empty() && query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns) { auto alias_column_step = createComputeAliasColumnsStep(alias_column_expressions, query_plan.getCurrentDataStream()); @@ -1104,21 +1097,21 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (from_stage == QueryProcessingStage::FetchColumns) { - auto rename_actions_dag = std::make_shared(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG rename_actions_dag(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs updated_actions_dag_outputs; - for (auto & output_node : rename_actions_dag->getOutputs()) + for (auto & output_node : rename_actions_dag.getOutputs()) { const auto * column_identifier = table_expression_data.getColumnIdentifierOrNull(output_node->result_name); if (!column_identifier) continue; - updated_actions_dag_outputs.push_back(&rename_actions_dag->addAlias(*output_node, *column_identifier)); + updated_actions_dag_outputs.push_back(&rename_actions_dag.addAlias(*output_node, *column_identifier)); } - rename_actions_dag->getOutputs() = std::move(updated_actions_dag_outputs); + rename_actions_dag.getOutputs() = std::move(updated_actions_dag_outputs); - auto rename_step = std::make_unique(query_plan.getCurrentDataStream(), rename_actions_dag); + auto rename_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(rename_actions_dag)); rename_step->setStepDescription("Change column names to column identifiers"); query_plan.addStep(std::move(rename_step)); } @@ -1158,9 +1151,9 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextPtr & planner_context, const FunctionOverloadResolverPtr & to_nullable_function) { - auto cast_actions_dag = std::make_shared(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG cast_actions_dag(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); - for (auto & output_node : cast_actions_dag->getOutputs()) + for (auto & output_node : cast_actions_dag.getOutputs()) { if (planner_context->getGlobalPlannerContext()->hasColumnIdentifier(output_node->result_name)) { @@ -1169,11 +1162,11 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP type_to_check = type_to_check_low_cardinality->getDictionaryType(); if (type_to_check->canBeInsideNullable()) - output_node = &cast_actions_dag->addFunction(to_nullable_function, {output_node}, output_node->result_name); + output_node = &cast_actions_dag.addFunction(to_nullable_function, {output_node}, output_node->result_name); } } - cast_actions_dag->appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); + cast_actions_dag.appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); cast_join_columns_step->setStepDescription("Cast JOIN columns to Nullable"); plan_to_add_cast.addStep(std::move(cast_join_columns_step)); @@ -1219,14 +1212,16 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ join_table_expression, planner_context); - join_clauses_and_actions.left_join_expressions_actions->appendInputsForUnusedColumns(left_plan.getCurrentDataStream().header); - auto left_join_expressions_actions_step = std::make_unique(left_plan.getCurrentDataStream(), join_clauses_and_actions.left_join_expressions_actions); + join_clauses_and_actions.left_join_expressions_actions.appendInputsForUnusedColumns(left_plan.getCurrentDataStream().header); + auto left_join_expressions_actions_step = std::make_unique(left_plan.getCurrentDataStream(), std::move(join_clauses_and_actions.left_join_expressions_actions)); left_join_expressions_actions_step->setStepDescription("JOIN actions"); + appendSetsFromActionsDAG(left_join_expressions_actions_step->getExpression(), left_join_tree_query_plan.useful_sets); left_plan.addStep(std::move(left_join_expressions_actions_step)); - join_clauses_and_actions.right_join_expressions_actions->appendInputsForUnusedColumns(right_plan.getCurrentDataStream().header); - auto right_join_expressions_actions_step = std::make_unique(right_plan.getCurrentDataStream(), join_clauses_and_actions.right_join_expressions_actions); + join_clauses_and_actions.right_join_expressions_actions.appendInputsForUnusedColumns(right_plan.getCurrentDataStream().header); + auto right_join_expressions_actions_step = std::make_unique(right_plan.getCurrentDataStream(), std::move(join_clauses_and_actions.right_join_expressions_actions)); right_join_expressions_actions_step->setStepDescription("JOIN actions"); + appendSetsFromActionsDAG(right_join_expressions_actions_step->getExpression(), right_join_tree_query_plan.useful_sets); right_plan.addStep(std::move(right_join_expressions_actions_step)); } @@ -1264,19 +1259,19 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ auto join_cast_plan_output_nodes = [&](QueryPlan & plan_to_add_cast, std::unordered_map & plan_column_name_to_cast_type) { - auto cast_actions_dag = std::make_shared(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG cast_actions_dag(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); - for (auto & output_node : cast_actions_dag->getOutputs()) + for (auto & output_node : cast_actions_dag.getOutputs()) { auto it = plan_column_name_to_cast_type.find(output_node->result_name); if (it == plan_column_name_to_cast_type.end()) continue; const auto & cast_type = it->second; - output_node = &cast_actions_dag->addCast(*output_node, cast_type, output_node->result_name); + output_node = &cast_actions_dag.addCast(*output_node, cast_type, output_node->result_name); } - cast_actions_dag->appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); + cast_actions_dag.appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); cast_join_columns_step->setStepDescription("Cast JOIN USING columns"); @@ -1397,12 +1392,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ { if (!join_clause.hasASOF()) throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "JOIN {} no inequality in ASOF JOIN ON section.", - join_node.formatASTForErrorMessage()); - - if (table_join_clause.key_names_left.size() <= 1) - throw Exception(ErrorCodes::SYNTAX_ERROR, - "JOIN {} ASOF join needs at least one equi-join column", + "JOIN {} no inequality in ASOF JOIN ON section", join_node.formatASTForErrorMessage()); } @@ -1424,8 +1414,10 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ { ExpressionActionsPtr & mixed_join_expression = table_join->getMixedJoinExpression(); mixed_join_expression = std::make_shared( - join_clauses_and_actions.mixed_join_expressions_actions, + std::move(*join_clauses_and_actions.mixed_join_expressions_actions), ExpressionActionsSettings::fromContext(planner_context->getQueryContext())); + + appendSetsFromActionsDAG(mixed_join_expression->getActionsDAG(), left_join_tree_query_plan.useful_sets); } } else if (join_node.isUsingJoinExpression()) @@ -1471,7 +1463,8 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ auto result_plan = QueryPlan(); - if (join_algorithm->isFilled()) + bool is_filled_join = join_algorithm->isFilled(); + if (is_filled_join) { auto filled_join_step = std::make_unique( left_plan.getCurrentDataStream(), @@ -1524,7 +1517,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ { const auto & join_clause = table_join->getOnlyClause(); - bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind); + bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any) + && (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind)); + auto has_non_const = [](const Block & block, const auto & keys) { @@ -1544,7 +1539,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left) && has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right); - if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys) + if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys) { auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left); auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right); @@ -1578,12 +1573,12 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ result_plan.unitePlans(std::move(join_step), {std::move(plans)}); } - auto drop_unused_columns_after_join_actions_dag = std::make_shared(result_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG drop_unused_columns_after_join_actions_dag(result_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs drop_unused_columns_after_join_actions_dag_updated_outputs; std::unordered_set drop_unused_columns_after_join_actions_dag_updated_outputs_names; std::optional first_skipped_column_node_index; - auto & drop_unused_columns_after_join_actions_dag_outputs = drop_unused_columns_after_join_actions_dag->getOutputs(); + auto & drop_unused_columns_after_join_actions_dag_outputs = drop_unused_columns_after_join_actions_dag.getOutputs(); size_t drop_unused_columns_after_join_actions_dag_outputs_size = drop_unused_columns_after_join_actions_dag_outputs.size(); for (size_t i = 0; i < drop_unused_columns_after_join_actions_dag_outputs_size; ++i) @@ -1622,15 +1617,10 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ for (const auto & right_join_tree_query_plan_row_policy : right_join_tree_query_plan.used_row_policies) left_join_tree_query_plan.used_row_policies.insert(right_join_tree_query_plan_row_policy); - /// Collect all required actions dags in `left_join_tree_query_plan.actions_dags` - for (auto && action_dag : right_join_tree_query_plan.actions_dags) - left_join_tree_query_plan.actions_dags.emplace_back(action_dag); - if (join_clauses_and_actions.left_join_expressions_actions) - left_join_tree_query_plan.actions_dags.emplace_back(std::move(join_clauses_and_actions.left_join_expressions_actions)); - if (join_clauses_and_actions.right_join_expressions_actions) - left_join_tree_query_plan.actions_dags.emplace_back(std::move(join_clauses_and_actions.right_join_expressions_actions)); - if (join_clauses_and_actions.mixed_join_expressions_actions) - left_join_tree_query_plan.actions_dags.push_back(join_clauses_and_actions.mixed_join_expressions_actions); + /// Collect all required actions sets in `left_join_tree_query_plan.useful_sets` + if (!is_filled_join) + for (const auto & useful_set : right_join_tree_query_plan.useful_sets) + left_join_tree_query_plan.useful_sets.insert(useful_set); auto mapping = std::move(left_join_tree_query_plan.query_node_to_plan_step_mapping); auto & r_mapping = right_join_tree_query_plan.query_node_to_plan_step_mapping; @@ -1640,7 +1630,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ .query_plan = std::move(result_plan), .from_stage = QueryProcessingStage::FetchColumns, .used_row_policies = std::move(left_join_tree_query_plan.used_row_policies), - .actions_dags = std::move(left_join_tree_query_plan.actions_dags), + .useful_sets = std::move(left_join_tree_query_plan.useful_sets), .query_node_to_plan_step_mapping = std::move(mapping), }; } @@ -1660,7 +1650,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ auto plan = std::move(join_tree_query_plan.query_plan); auto plan_output_columns = plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); - ActionsDAGPtr array_join_action_dag = std::make_shared(plan_output_columns); + ActionsDAG array_join_action_dag(plan_output_columns); PlannerActionsVisitor actions_visitor(planner_context); std::unordered_set array_join_expressions_output_nodes; @@ -1671,29 +1661,28 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ array_join_column_names.insert(array_join_column_identifier); auto & array_join_expression_column = array_join_expression->as(); - auto expression_dag_index_nodes = actions_visitor.visit(*array_join_action_dag, array_join_expression_column.getExpressionOrThrow()); + auto expression_dag_index_nodes = actions_visitor.visit(array_join_action_dag, array_join_expression_column.getExpressionOrThrow()); for (auto & expression_dag_index_node : expression_dag_index_nodes) { - const auto * array_join_column_node = &array_join_action_dag->addAlias(*expression_dag_index_node, array_join_column_identifier); - array_join_action_dag->getOutputs().push_back(array_join_column_node); + const auto * array_join_column_node = &array_join_action_dag.addAlias(*expression_dag_index_node, array_join_column_identifier); + array_join_action_dag.getOutputs().push_back(array_join_column_node); array_join_expressions_output_nodes.insert(array_join_column_node->result_name); } } - array_join_action_dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); + array_join_action_dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header); - join_tree_query_plan.actions_dags.push_back(array_join_action_dag); - - auto array_join_actions = std::make_unique(plan.getCurrentDataStream(), array_join_action_dag); + auto array_join_actions = std::make_unique(plan.getCurrentDataStream(), std::move(array_join_action_dag)); array_join_actions->setStepDescription("ARRAY JOIN actions"); + appendSetsFromActionsDAG(array_join_actions->getExpression(), join_tree_query_plan.useful_sets); plan.addStep(std::move(array_join_actions)); - auto drop_unused_columns_before_array_join_actions_dag = std::make_shared(plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG drop_unused_columns_before_array_join_actions_dag(plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs drop_unused_columns_before_array_join_actions_dag_updated_outputs; std::unordered_set drop_unused_columns_before_array_join_actions_dag_updated_outputs_names; - auto & drop_unused_columns_before_array_join_actions_dag_outputs = drop_unused_columns_before_array_join_actions_dag->getOutputs(); + auto & drop_unused_columns_before_array_join_actions_dag_outputs = drop_unused_columns_before_array_join_actions_dag.getOutputs(); size_t drop_unused_columns_before_array_join_actions_dag_outputs_size = drop_unused_columns_before_array_join_actions_dag_outputs.size(); for (size_t i = 0; i < drop_unused_columns_before_array_join_actions_dag_outputs_size; ++i) @@ -1727,7 +1716,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ .query_plan = std::move(plan), .from_stage = QueryProcessingStage::FetchColumns, .used_row_policies = std::move(join_tree_query_plan.used_row_policies), - .actions_dags = std::move(join_tree_query_plan.actions_dags), + .useful_sets = std::move(join_tree_query_plan.useful_sets), .query_node_to_plan_step_mapping = std::move(join_tree_query_plan.query_node_to_plan_step_mapping), }; } diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h index 9110b2bfef9..259622b1d50 100644 --- a/src/Planner/PlannerJoinTree.h +++ b/src/Planner/PlannerJoinTree.h @@ -11,12 +11,14 @@ namespace DB { +using UsefulSets = std::unordered_set; + struct JoinTreeQueryPlan { QueryPlan query_plan; QueryProcessingStage::Enum from_stage; std::set used_row_policies{}; - std::vector actions_dags{}; + UsefulSets useful_sets{}; std::unordered_map query_node_to_plan_step_mapping{}; }; diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index d9360a58240..5acff9dac82 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -180,13 +180,13 @@ std::set extractJoinTableSidesFromExpression(//const ActionsDAG:: } const ActionsDAG::Node * appendExpression( - ActionsDAGPtr & dag, + ActionsDAG & dag, const QueryTreeNodePtr & expression, const PlannerContextPtr & planner_context, const JoinNode & join_node) { PlannerActionsVisitor join_expression_visitor(planner_context); - auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(*dag, expression); + auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(dag, expression); if (join_expression_dag_node_raw_pointers.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", @@ -196,9 +196,9 @@ const ActionsDAG::Node * appendExpression( } void buildJoinClause( - ActionsDAGPtr & left_dag, - ActionsDAGPtr & right_dag, - ActionsDAGPtr & mixed_dag, + ActionsDAG & left_dag, + ActionsDAG & right_dag, + ActionsDAG & mixed_dag, const PlannerContextPtr & planner_context, const QueryTreeNodePtr & join_expression, const TableExpressionSet & left_table_expressions, @@ -379,8 +379,8 @@ JoinClausesAndActions buildJoinClausesAndActions( const JoinNode & join_node, const PlannerContextPtr & planner_context) { - ActionsDAGPtr left_join_actions = std::make_shared(left_table_expression_columns); - ActionsDAGPtr right_join_actions = std::make_shared(right_table_expression_columns); + ActionsDAG left_join_actions(left_table_expression_columns); + ActionsDAG right_join_actions(right_table_expression_columns); ColumnsWithTypeAndName mixed_table_expression_columns; for (const auto & left_column : left_table_expression_columns) { @@ -390,7 +390,7 @@ JoinClausesAndActions buildJoinClausesAndActions( { mixed_table_expression_columns.push_back(right_column); } - ActionsDAGPtr mixed_join_actions = std::make_shared(mixed_table_expression_columns); + ActionsDAG mixed_join_actions(mixed_table_expression_columns); /** It is possible to have constant value in JOIN ON section, that we need to ignore during DAG construction. * If we do not ignore it, this function will be replaced by underlying constant. @@ -501,12 +501,12 @@ JoinClausesAndActions buildJoinClausesAndActions( { const ActionsDAG::Node * dag_filter_condition_node = nullptr; if (left_filter_condition_nodes.size() > 1) - dag_filter_condition_node = &left_join_actions->addFunction(and_function, left_filter_condition_nodes, {}); + dag_filter_condition_node = &left_join_actions.addFunction(and_function, left_filter_condition_nodes, {}); else dag_filter_condition_node = left_filter_condition_nodes[0]; join_clause.getLeftFilterConditionNodes() = {dag_filter_condition_node}; - left_join_actions->addOrReplaceInOutputs(*dag_filter_condition_node); + left_join_actions.addOrReplaceInOutputs(*dag_filter_condition_node); add_necessary_name_if_needed(JoinTableSide::Left, dag_filter_condition_node->result_name); } @@ -517,12 +517,12 @@ JoinClausesAndActions buildJoinClausesAndActions( const ActionsDAG::Node * dag_filter_condition_node = nullptr; if (right_filter_condition_nodes.size() > 1) - dag_filter_condition_node = &right_join_actions->addFunction(and_function, right_filter_condition_nodes, {}); + dag_filter_condition_node = &right_join_actions.addFunction(and_function, right_filter_condition_nodes, {}); else dag_filter_condition_node = right_filter_condition_nodes[0]; join_clause.getRightFilterConditionNodes() = {dag_filter_condition_node}; - right_join_actions->addOrReplaceInOutputs(*dag_filter_condition_node); + right_join_actions.addOrReplaceInOutputs(*dag_filter_condition_node); add_necessary_name_if_needed(JoinTableSide::Right, dag_filter_condition_node->result_name); } @@ -559,10 +559,10 @@ JoinClausesAndActions buildJoinClausesAndActions( } if (!left_key_node->result_type->equals(*common_type)) - left_key_node = &left_join_actions->addCast(*left_key_node, common_type, {}); + left_key_node = &left_join_actions.addCast(*left_key_node, common_type, {}); if (!right_key_node->result_type->equals(*common_type)) - right_key_node = &right_join_actions->addCast(*right_key_node, common_type, {}); + right_key_node = &right_join_actions.addCast(*right_key_node, common_type, {}); } if (join_clause.isNullsafeCompareKey(i) && left_key_node->result_type->isNullable() && right_key_node->result_type->isNullable()) @@ -579,24 +579,24 @@ JoinClausesAndActions buildJoinClausesAndActions( * SELECT * FROM t1 JOIN t2 ON tuple(t1.a) == tuple(t2.b) */ auto wrap_nullsafe_function = FunctionFactory::instance().get("tuple", planner_context->getQueryContext()); - left_key_node = &left_join_actions->addFunction(wrap_nullsafe_function, {left_key_node}, {}); - right_key_node = &right_join_actions->addFunction(wrap_nullsafe_function, {right_key_node}, {}); + left_key_node = &left_join_actions.addFunction(wrap_nullsafe_function, {left_key_node}, {}); + right_key_node = &right_join_actions.addFunction(wrap_nullsafe_function, {right_key_node}, {}); } - left_join_actions->addOrReplaceInOutputs(*left_key_node); - right_join_actions->addOrReplaceInOutputs(*right_key_node); + left_join_actions.addOrReplaceInOutputs(*left_key_node); + right_join_actions.addOrReplaceInOutputs(*right_key_node); add_necessary_name_if_needed(JoinTableSide::Left, left_key_node->result_name); add_necessary_name_if_needed(JoinTableSide::Right, right_key_node->result_name); } } - result.left_join_expressions_actions = left_join_actions->clone(); + result.left_join_expressions_actions = left_join_actions.clone(); result.left_join_tmp_expression_actions = std::move(left_join_actions); - result.left_join_expressions_actions->removeUnusedActions(join_left_actions_names); - result.right_join_expressions_actions = right_join_actions->clone(); + result.left_join_expressions_actions.removeUnusedActions(join_left_actions_names); + result.right_join_expressions_actions = right_join_actions.clone(); result.right_join_tmp_expression_actions = std::move(right_join_actions); - result.right_join_expressions_actions->removeUnusedActions(join_right_actions_names); + result.right_join_expressions_actions.removeUnusedActions(join_right_actions_names); if (is_inequal_join) { @@ -604,24 +604,24 @@ JoinClausesAndActions buildJoinClausesAndActions( /// So, for each column, we recalculate the value of the whole expression from JOIN ON to check if rows should be joined. if (result.join_clauses.size() > 1) { - auto mixed_join_expressions_actions = std::make_shared(mixed_table_expression_columns); + ActionsDAG mixed_join_expressions_actions(mixed_table_expression_columns); PlannerActionsVisitor join_expression_visitor(planner_context); - auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(*mixed_join_expressions_actions, join_expression); + auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(mixed_join_expressions_actions, join_expression); if (join_expression_dag_node_raw_pointers.size() != 1) throw Exception( ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", join_node.formatASTForErrorMessage()); - mixed_join_expressions_actions->addOrReplaceInOutputs(*join_expression_dag_node_raw_pointers[0]); + mixed_join_expressions_actions.addOrReplaceInOutputs(*join_expression_dag_node_raw_pointers[0]); Names required_names{join_expression_dag_node_raw_pointers[0]->result_name}; - mixed_join_expressions_actions->removeUnusedActions(required_names); - result.mixed_join_expressions_actions = mixed_join_expressions_actions; + mixed_join_expressions_actions.removeUnusedActions(required_names); + result.mixed_join_expressions_actions = std::move(mixed_join_expressions_actions); } else { const auto & join_clause = result.join_clauses.front(); const auto & mixed_filter_condition_nodes = join_clause.getMixedFilterConditionNodes(); auto mixed_join_expressions_actions = ActionsDAG::buildFilterActionsDAG(mixed_filter_condition_nodes, {}, true); - result.mixed_join_expressions_actions = mixed_join_expressions_actions; + result.mixed_join_expressions_actions = std::move(mixed_join_expressions_actions); } auto outputs = result.mixed_join_expressions_actions->getOutputs(); if (outputs.size() != 1) diff --git a/src/Planner/PlannerJoins.h b/src/Planner/PlannerJoins.h index 8adf6edd7ea..d8665ab7739 100644 --- a/src/Planner/PlannerJoins.h +++ b/src/Planner/PlannerJoins.h @@ -182,15 +182,15 @@ struct JoinClausesAndActions /// Join clauses. Actions dag nodes point into join_expression_actions. JoinClauses join_clauses; /// Whole JOIN ON section expressions - ActionsDAGPtr left_join_tmp_expression_actions; - ActionsDAGPtr right_join_tmp_expression_actions; + ActionsDAG left_join_tmp_expression_actions; + ActionsDAG right_join_tmp_expression_actions; /// Left join expressions actions - ActionsDAGPtr left_join_expressions_actions; + ActionsDAG left_join_expressions_actions; /// Right join expressions actions - ActionsDAGPtr right_join_expressions_actions; + ActionsDAG right_join_expressions_actions; /// Originally used for inequal join. it's the total join expression. /// If there is no inequal join conditions, it's null. - ActionsDAGPtr mixed_join_expressions_actions; + std::optional mixed_join_expressions_actions; }; /** Calculate join clauses and actions for JOIN ON section. diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index 9723a00a356..72412a869e4 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -73,7 +73,7 @@ public: } /// Add alias column - void addAliasColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, ActionsDAGPtr actions_dag, bool is_selected_column = true) + void addAliasColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, ActionsDAG actions_dag, bool is_selected_column = true) { alias_column_expressions.emplace(column.name, std::move(actions_dag)); addColumnImpl(column, column_identifier, is_selected_column); @@ -94,7 +94,7 @@ public: } /// Get ALIAS columns names mapped to expressions - const std::unordered_map & getAliasColumnExpressions() const + std::unordered_map & getAliasColumnExpressions() { return alias_column_expressions; } @@ -211,32 +211,32 @@ public: is_merge_tree = is_merge_tree_value; } - const ActionsDAGPtr & getPrewhereFilterActions() const + const std::optional & getPrewhereFilterActions() const { return prewhere_filter_actions; } - void setRowLevelFilterActions(ActionsDAGPtr row_level_filter_actions_value) + void setRowLevelFilterActions(ActionsDAG row_level_filter_actions_value) { row_level_filter_actions = std::move(row_level_filter_actions_value); } - const ActionsDAGPtr & getRowLevelFilterActions() const + const std::optional & getRowLevelFilterActions() const { return row_level_filter_actions; } - void setPrewhereFilterActions(ActionsDAGPtr prewhere_filter_actions_value) + void setPrewhereFilterActions(ActionsDAG prewhere_filter_actions_value) { prewhere_filter_actions = std::move(prewhere_filter_actions_value); } - const ActionsDAGPtr & getFilterActions() const + const std::optional & getFilterActions() const { return filter_actions; } - void setFilterActions(ActionsDAGPtr filter_actions_value) + void setFilterActions(ActionsDAG filter_actions_value) { filter_actions = std::move(filter_actions_value); } @@ -277,7 +277,7 @@ private: NameSet selected_column_names_set; /// Expression to calculate ALIAS columns - std::unordered_map alias_column_expressions; + std::unordered_map alias_column_expressions; /// Valid for table, table function, array join, query, union nodes ColumnNameToColumn column_name_to_column; @@ -289,16 +289,16 @@ private: ColumnIdentifierToColumnName column_identifier_to_column_name; /// Valid for table, table function - ActionsDAGPtr filter_actions; + std::optional filter_actions; /// Valid for table, table function PrewhereInfoPtr prewhere_info; /// Valid for table, table function - ActionsDAGPtr prewhere_filter_actions; + std::optional prewhere_filter_actions; /// Valid for table, table function - ActionsDAGPtr row_level_filter_actions; + std::optional row_level_filter_actions; /// Is storage remote bool is_remote = false; diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 3c54c57a28c..a6e94a124e6 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -11,10 +11,12 @@ #include #include +#include #include #include +#include #include @@ -442,22 +444,22 @@ FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, collectSourceColumns(filter_query_tree, planner_context, false /*keep_alias_columns*/); collectSets(filter_query_tree, *planner_context); - auto filter_actions_dag = std::make_shared(); + ActionsDAG filter_actions_dag; PlannerActionsVisitor actions_visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); - auto expression_nodes = actions_visitor.visit(*filter_actions_dag, filter_query_tree); + auto expression_nodes = actions_visitor.visit(filter_actions_dag, filter_query_tree); if (expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filter actions must return single output node. Actual {}", expression_nodes.size()); - auto & filter_actions_outputs = filter_actions_dag->getOutputs(); + auto & filter_actions_outputs = filter_actions_dag.getOutputs(); filter_actions_outputs = std::move(expression_nodes); std::string filter_node_name = filter_actions_outputs[0]->result_name; bool remove_filter_column = true; - for (const auto & filter_input_node : filter_actions_dag->getInputs()) + for (const auto & filter_input_node : filter_actions_dag.getInputs()) if (table_expression_required_names_without_filter.contains(filter_input_node->result_name)) filter_actions_outputs.push_back(filter_input_node); @@ -477,4 +479,32 @@ ASTPtr parseAdditionalResultFilter(const Settings & settings) return additional_result_filter_ast; } +void appendSetsFromActionsDAG(const ActionsDAG & dag, UsefulSets & useful_sets) +{ + for (const auto & node : dag.getNodes()) + { + if (node.column) + { + const IColumn * column = node.column.get(); + if (const auto * column_const = typeid_cast(column)) + column = &column_const->getDataColumn(); + + if (const auto * column_set = typeid_cast(column)) + useful_sets.insert(column_set->getData()); + } + + if (node.type == ActionsDAG::ActionType::FUNCTION && node.function_base->getName() == "indexHint") + { + ActionsDAG::NodeRawConstPtrs children; + if (const auto * adaptor = typeid_cast(node.function_base.get())) + { + if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) + { + appendSetsFromActionsDAG(index_hint->getActions(), useful_sets); + } + } + } + } +} + } diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index 3172847f053..ae60976a8d6 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -88,4 +88,7 @@ FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, ASTPtr parseAdditionalResultFilter(const Settings & settings); +using UsefulSets = std::unordered_set; +void appendSetsFromActionsDAG(const ActionsDAG & dag, UsefulSets & useful_sets); + } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index be4e9430c34..0e3972e8c45 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -46,7 +46,7 @@ JSONEachRowRowInputFormat::JSONEachRowRowInputFormat( { const auto & header = getPort().getHeader(); name_map = header.getNamesToIndexesMap(); - if (format_settings_.json.ignore_key_case) + if (format_settings_.json.case_insensitive_column_matching) { for (auto & it : name_map) { @@ -181,7 +181,7 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns) continue; } size_t column_index = 0; - if (format_settings.json.ignore_key_case) + if (format_settings.json.case_insensitive_column_matching) { String lower_case_name = transformFieldNameToLowerCase(name_ref); StringRef field_name_ref = lower_case_name_map[lower_case_name]; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index e837d4d5e20..bc5e8292192 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -420,7 +420,7 @@ void ParquetBlockInputFormat::initializeIfNeeded() int num_row_groups = metadata->num_row_groups(); row_group_batches.reserve(num_row_groups); - auto adative_chunk_size = [&](int row_group_idx) -> size_t + auto adaptive_chunk_size = [&](int row_group_idx) -> size_t { size_t total_size = 0; auto row_group_meta = metadata->RowGroup(row_group_idx); @@ -457,7 +457,7 @@ void ParquetBlockInputFormat::initializeIfNeeded() row_group_batches.back().row_groups_idxs.push_back(row_group); row_group_batches.back().total_rows += metadata->RowGroup(row_group)->num_rows(); row_group_batches.back().total_bytes_compressed += metadata->RowGroup(row_group)->total_compressed_size(); - auto rows = adative_chunk_size(row_group); + auto rows = adaptive_chunk_size(row_group); row_group_batches.back().adaptive_chunk_size = rows ? rows : format_settings.parquet.max_block_size; } } diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index c23d717d52f..157b61aa2af 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -625,8 +625,6 @@ void ValuesBlockInputFormat::readSuffix() skipWhitespaceIfAny(*buf); if (buf->hasUnreadData()) throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read data after semicolon"); - if (!format_settings.values.allow_data_after_semicolon && !buf->eof()) - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0)"); return; } diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 0d7e05af1de..f31de80b22d 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -303,15 +303,15 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B const auto & header = ports[set_counter]->getHeader(); /// Here we create a DAG which fills missing keys and adds `__grouping_set` column - auto dag = std::make_shared(header.getColumnsWithTypeAndName()); + ActionsDAG dag(header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs outputs; outputs.reserve(output_header.columns() + 1); auto grouping_col = ColumnConst::create(ColumnUInt64::create(1, set_counter), 0); - const auto * grouping_node = &dag->addColumn( + const auto * grouping_node = &dag.addColumn( {ColumnPtr(std::move(grouping_col)), std::make_shared(), "__grouping_set"}); - grouping_node = &dag->materializeNode(*grouping_node); + grouping_node = &dag.materializeNode(*grouping_node); outputs.push_back(grouping_node); const auto & missing_columns = grouping_sets_params[set_counter].missing_keys; @@ -332,22 +332,22 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B column_with_default->finalize(); auto column = ColumnConst::create(std::move(column_with_default), 0); - const auto * node = &dag->addColumn({ColumnPtr(std::move(column)), col.type, col.name}); - node = &dag->materializeNode(*node); + const auto * node = &dag.addColumn({ColumnPtr(std::move(column)), col.type, col.name}); + node = &dag.materializeNode(*node); outputs.push_back(node); } else { - const auto * column_node = dag->getOutputs()[header.getPositionByName(col.name)]; + const auto * column_node = dag.getOutputs()[header.getPositionByName(col.name)]; if (used_it != used_keys.end() && group_by_use_nulls && column_node->result_type->canBeInsideNullable()) - outputs.push_back(&dag->addFunction(to_nullable_function, { column_node }, col.name)); + outputs.push_back(&dag.addFunction(to_nullable_function, { column_node }, col.name)); else outputs.push_back(column_node); } } - dag->getOutputs().swap(outputs); - auto expression = std::make_shared(dag, settings.getActionsSettings()); + dag.getOutputs().swap(outputs); + auto expression = std::make_shared(std::move(dag), settings.getActionsSettings()); auto transform = std::make_shared(header, expression); connect(*ports[set_counter], transform->getInputPort()); diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index d010a3327a6..3a98f8e4612 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -36,30 +36,30 @@ CubeStep::CubeStep(const DataStream & input_stream_, Aggregator::Params params_, ProcessorPtr addGroupingSetForTotals(const Block & header, const Names & keys, bool use_nulls, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number) { - auto dag = std::make_shared(header.getColumnsWithTypeAndName()); - auto & outputs = dag->getOutputs(); + ActionsDAG dag(header.getColumnsWithTypeAndName()); + auto & outputs = dag.getOutputs(); if (use_nulls) { auto to_nullable = FunctionFactory::instance().get("toNullable", nullptr); for (const auto & key : keys) { - const auto * node = dag->getOutputs()[header.getPositionByName(key)]; + const auto * node = dag.getOutputs()[header.getPositionByName(key)]; if (node->result_type->canBeInsideNullable()) { - dag->addOrReplaceInOutputs(dag->addFunction(to_nullable, { node }, node->result_name)); + dag.addOrReplaceInOutputs(dag.addFunction(to_nullable, { node }, node->result_name)); } } } auto grouping_col = ColumnUInt64::create(1, grouping_set_number); - const auto * grouping_node = &dag->addColumn( + const auto * grouping_node = &dag.addColumn( {ColumnPtr(std::move(grouping_col)), std::make_shared(), "__grouping_set"}); - grouping_node = &dag->materializeNode(*grouping_node); + grouping_node = &dag.materializeNode(*grouping_node); outputs.insert(outputs.begin(), grouping_node); - auto expression = std::make_shared(dag, settings.getActionsSettings()); + auto expression = std::make_shared(std::move(dag), settings.getActionsSettings()); return std::make_shared(header, expression); } diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index 14f58585d3d..d8624a1c99b 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -33,7 +33,7 @@ void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missi }; auto convert_actions_dag = get_converting_dag(plan.getCurrentDataStream().header, header); - auto converting = std::make_unique(plan.getCurrentDataStream(), convert_actions_dag); + auto converting = std::make_unique(plan.getCurrentDataStream(), std::move(convert_actions_dag)); plan.addStep(std::move(converting)); } diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp index 0ccb0c4492a..6f88c4527a4 100644 --- a/src/Processors/QueryPlan/ExpressionStep.cpp +++ b/src/Processors/QueryPlan/ExpressionStep.cpp @@ -10,33 +10,33 @@ namespace DB { -static ITransformingStep::Traits getTraits(const ActionsDAGPtr & actions, const Block & header, const SortDescription & sort_description) +static ITransformingStep::Traits getTraits(const ActionsDAG & actions, const Block & header, const SortDescription & sort_description) { return ITransformingStep::Traits { { .returns_single_stream = false, .preserves_number_of_streams = true, - .preserves_sorting = actions->isSortingPreserved(header, sort_description), + .preserves_sorting = actions.isSortingPreserved(header, sort_description), }, { - .preserves_number_of_rows = !actions->hasArrayJoin(), + .preserves_number_of_rows = !actions.hasArrayJoin(), } }; } -ExpressionStep::ExpressionStep(const DataStream & input_stream_, const ActionsDAGPtr & actions_dag_) +ExpressionStep::ExpressionStep(const DataStream & input_stream_, ActionsDAG actions_dag_) : ITransformingStep( input_stream_, - ExpressionTransform::transformHeader(input_stream_.header, *actions_dag_), + ExpressionTransform::transformHeader(input_stream_.header, actions_dag_), getTraits(actions_dag_, input_stream_.header, input_stream_.sort_description)) - , actions_dag(actions_dag_) + , actions_dag(std::move(actions_dag_)) { } void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { - auto expression = std::make_shared(actions_dag, settings.getActionsSettings()); + auto expression = std::make_shared(std::move(actions_dag), settings.getActionsSettings()); pipeline.addSimpleTransform([&](const Block & header) { @@ -49,7 +49,7 @@ void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const Bu pipeline.getHeader().getColumnsWithTypeAndName(), output_stream->header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); - auto convert_actions = std::make_shared(convert_actions_dag, settings.getActionsSettings()); + auto convert_actions = std::make_shared(std::move(convert_actions_dag), settings.getActionsSettings()); pipeline.addSimpleTransform([&](const Block & header) { @@ -61,20 +61,20 @@ void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const Bu void ExpressionStep::describeActions(FormatSettings & settings) const { String prefix(settings.offset, settings.indent_char); - auto expression = std::make_shared(actions_dag); + auto expression = std::make_shared(actions_dag.clone()); expression->describeActions(settings.out, prefix); } void ExpressionStep::describeActions(JSONBuilder::JSONMap & map) const { - auto expression = std::make_shared(actions_dag); + auto expression = std::make_shared(actions_dag.clone()); map.add("Expression", expression->toTree()); } void ExpressionStep::updateOutputStream() { output_stream = createOutputStream( - input_streams.front(), ExpressionTransform::transformHeader(input_streams.front().header, *actions_dag), getDataStreamTraits()); + input_streams.front(), ExpressionTransform::transformHeader(input_streams.front().header, actions_dag), getDataStreamTraits()); if (!getDataStreamTraits().preserves_sorting) return; diff --git a/src/Processors/QueryPlan/ExpressionStep.h b/src/Processors/QueryPlan/ExpressionStep.h index 3eef14ac129..f2926318cbc 100644 --- a/src/Processors/QueryPlan/ExpressionStep.h +++ b/src/Processors/QueryPlan/ExpressionStep.h @@ -1,12 +1,10 @@ #pragma once #include +#include namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; - class ExpressionTransform; class JoiningTransform; @@ -15,21 +13,22 @@ class ExpressionStep : public ITransformingStep { public: - explicit ExpressionStep(const DataStream & input_stream_, const ActionsDAGPtr & actions_dag_); + explicit ExpressionStep(const DataStream & input_stream_, ActionsDAG actions_dag_); String getName() const override { return "Expression"; } void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; void describeActions(FormatSettings & settings) const override; - const ActionsDAGPtr & getExpression() const { return actions_dag; } + ActionsDAG & getExpression() { return actions_dag; } + const ActionsDAG & getExpression() const { return actions_dag; } void describeActions(JSONBuilder::JSONMap & map) const override; private: void updateOutputStream() override; - ActionsDAGPtr actions_dag; + ActionsDAG actions_dag; }; } diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index 72934665b5c..0c6b71387b7 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -9,9 +9,9 @@ namespace DB { -static ITransformingStep::Traits getTraits(const ActionsDAGPtr & expression, const Block & header, const SortDescription & sort_description, bool remove_filter_column, const String & filter_column_name) +static ITransformingStep::Traits getTraits(const ActionsDAG & expression, const Block & header, const SortDescription & sort_description, bool remove_filter_column, const String & filter_column_name) { - bool preserves_sorting = expression->isSortingPreserved(header, sort_description, remove_filter_column ? filter_column_name : ""); + bool preserves_sorting = expression.isSortingPreserved(header, sort_description, remove_filter_column ? filter_column_name : ""); if (remove_filter_column) { preserves_sorting &= std::find_if( @@ -35,28 +35,27 @@ static ITransformingStep::Traits getTraits(const ActionsDAGPtr & expression, con FilterStep::FilterStep( const DataStream & input_stream_, - const ActionsDAGPtr & actions_dag_, + ActionsDAG actions_dag_, String filter_column_name_, bool remove_filter_column_) : ITransformingStep( input_stream_, FilterTransform::transformHeader( input_stream_.header, - actions_dag_.get(), + &actions_dag_, filter_column_name_, remove_filter_column_), getTraits(actions_dag_, input_stream_.header, input_stream_.sort_description, remove_filter_column_, filter_column_name_)) - , actions_dag(actions_dag_) + , actions_dag(std::move(actions_dag_)) , filter_column_name(std::move(filter_column_name_)) , remove_filter_column(remove_filter_column_) { - actions_dag = actions_dag->clone(); - actions_dag->removeAliasesForFilter(filter_column_name); + actions_dag.removeAliasesForFilter(filter_column_name); } void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { - auto expression = std::make_shared(actions_dag, settings.getActionsSettings()); + auto expression = std::make_shared(std::move(actions_dag), settings.getActionsSettings()); pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) { @@ -70,7 +69,7 @@ void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ pipeline.getHeader().getColumnsWithTypeAndName(), output_stream->header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); - auto convert_actions = std::make_shared(convert_actions_dag, settings.getActionsSettings()); + auto convert_actions = std::make_shared(std::move(convert_actions_dag), settings.getActionsSettings()); pipeline.addSimpleTransform([&](const Block & header) { @@ -88,7 +87,7 @@ void FilterStep::describeActions(FormatSettings & settings) const settings.out << " (removed)"; settings.out << '\n'; - auto expression = std::make_shared(actions_dag); + auto expression = std::make_shared(actions_dag.clone()); expression->describeActions(settings.out, prefix); } @@ -97,7 +96,7 @@ void FilterStep::describeActions(JSONBuilder::JSONMap & map) const map.add("Filter Column", filter_column_name); map.add("Removes Filter", remove_filter_column); - auto expression = std::make_shared(actions_dag); + auto expression = std::make_shared(actions_dag.clone()); map.add("Expression", expression->toTree()); } @@ -105,7 +104,7 @@ void FilterStep::updateOutputStream() { output_stream = createOutputStream( input_streams.front(), - FilterTransform::transformHeader(input_streams.front().header, actions_dag.get(), filter_column_name, remove_filter_column), + FilterTransform::transformHeader(input_streams.front().header, &actions_dag, filter_column_name, remove_filter_column), getDataStreamTraits()); if (!getDataStreamTraits().preserves_sorting) diff --git a/src/Processors/QueryPlan/FilterStep.h b/src/Processors/QueryPlan/FilterStep.h index 939d0900c86..b5a31bef5fc 100644 --- a/src/Processors/QueryPlan/FilterStep.h +++ b/src/Processors/QueryPlan/FilterStep.h @@ -1,19 +1,17 @@ #pragma once #include +#include namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; - /// Implements WHERE, HAVING operations. See FilterTransform. class FilterStep : public ITransformingStep { public: FilterStep( const DataStream & input_stream_, - const ActionsDAGPtr & actions_dag_, + ActionsDAG actions_dag_, String filter_column_name_, bool remove_filter_column_); @@ -23,15 +21,15 @@ public: void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; - const ActionsDAGPtr & getExpression() const { return actions_dag; } - ActionsDAGPtr & getExpression() { return actions_dag; } + const ActionsDAG & getExpression() const { return actions_dag; } + ActionsDAG & getExpression() { return actions_dag; } const String & getFilterColumnName() const { return filter_column_name; } bool removesFilterColumn() const { return remove_filter_column; } private: void updateOutputStream() override; - ActionsDAGPtr actions_dag; + ActionsDAG actions_dag; String filter_column_name; bool remove_filter_column; }; diff --git a/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp b/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp index d90f0e152e7..0c708599398 100644 --- a/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp +++ b/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp @@ -23,7 +23,10 @@ size_t tryConvertOuterJoinToInnerJoin(QueryPlan::Node * parent_node, QueryPlan:: return 0; const auto & table_join = join->getJoin()->getTableJoin(); - if (table_join.strictness() == JoinStrictness::Asof) + + /// Any JOIN issue https://github.com/ClickHouse/ClickHouse/issues/66447 + /// Anti JOIN issue https://github.com/ClickHouse/ClickHouse/issues/67156 + if (table_join.strictness() != JoinStrictness::All) return 0; /// TODO: Support join_use_nulls @@ -45,10 +48,10 @@ size_t tryConvertOuterJoinToInnerJoin(QueryPlan::Node * parent_node, QueryPlan:: bool right_stream_safe = true; if (check_left_stream) - left_stream_safe = filter_dag->isFilterAlwaysFalseForDefaultValueInputs(filter_column_name, left_stream_input_header); + left_stream_safe = filter_dag.isFilterAlwaysFalseForDefaultValueInputs(filter_column_name, left_stream_input_header); if (check_right_stream) - right_stream_safe = filter_dag->isFilterAlwaysFalseForDefaultValueInputs(filter_column_name, right_stream_input_header); + right_stream_safe = filter_dag.isFilterAlwaysFalseForDefaultValueInputs(filter_column_name, right_stream_input_header); if (!left_stream_safe || !right_stream_safe) return 0; diff --git a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp index 0a3a4094a66..37e61a6c388 100644 --- a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp @@ -10,27 +10,27 @@ namespace DB::QueryPlanOptimizations { /// build actions DAG from stack of steps -static ActionsDAGPtr buildActionsForPlanPath(std::vector & dag_stack) +static std::optional buildActionsForPlanPath(std::vector & dag_stack) { if (dag_stack.empty()) - return nullptr; + return {}; - ActionsDAGPtr path_actions = dag_stack.back()->clone(); + ActionsDAG path_actions = dag_stack.back()->clone(); dag_stack.pop_back(); while (!dag_stack.empty()) { - ActionsDAGPtr clone = dag_stack.back()->clone(); + ActionsDAG clone = dag_stack.back()->clone(); dag_stack.pop_back(); - path_actions->mergeInplace(std::move(*clone)); + path_actions.mergeInplace(std::move(clone)); } return path_actions; } static std::set -getOriginalDistinctColumns(const ColumnsWithTypeAndName & distinct_columns, std::vector & dag_stack) +getOriginalDistinctColumns(const ColumnsWithTypeAndName & distinct_columns, std::vector & dag_stack) { auto actions = buildActionsForPlanPath(dag_stack); - FindOriginalNodeForOutputName original_node_finder(actions); + FindOriginalNodeForOutputName original_node_finder(*actions); std::set original_distinct_columns; for (const auto & column : distinct_columns) { @@ -65,7 +65,7 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node) /// (3) gather actions DAG to find original names for columns in distinct step later std::vector steps_to_update; QueryPlan::Node * node = parent_node; - std::vector dag_stack; + std::vector dag_stack; while (!node->children.empty()) { auto * step = dynamic_cast(node->step.get()); @@ -79,9 +79,9 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node) steps_to_update.push_back(step); if (const auto * const expr = typeid_cast(step); expr) - dag_stack.push_back(expr->getExpression()); + dag_stack.push_back(&expr->getExpression()); else if (const auto * const filter = typeid_cast(step); filter) - dag_stack.push_back(filter->getExpression()); + dag_stack.push_back(&filter->getExpression()); node = node->children.front(); } diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 263598bdca7..73314f005b6 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -101,7 +101,7 @@ static NameSet findIdentifiersOfNode(const ActionsDAG::Node * node) return res; } -static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & available_inputs, size_t child_idx = 0) +static std::optional splitFilter(QueryPlan::Node * parent_node, const Names & available_inputs, size_t child_idx = 0) { QueryPlan::Node * child_node = parent_node->children.front(); checkChildrenSize(child_node, child_idx + 1); @@ -110,16 +110,16 @@ static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & av auto & child = child_node->step; auto * filter = assert_cast(parent.get()); - const auto & expression = filter->getExpression(); + auto & expression = filter->getExpression(); const auto & filter_column_name = filter->getFilterColumnName(); bool removes_filter = filter->removesFilterColumn(); const auto & all_inputs = child->getInputStreams()[child_idx].header.getColumnsWithTypeAndName(); - return expression->splitActionsForFilterPushDown(filter_column_name, removes_filter, available_inputs, all_inputs); + return expression.splitActionsForFilterPushDown(filter_column_name, removes_filter, available_inputs, all_inputs); } static size_t -addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, const ActionsDAGPtr & split_filter, +addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, ActionsDAG split_filter, bool can_remove_filter = true, size_t child_idx = 0, bool update_parent_filter = true) { QueryPlan::Node * child_node = parent_node->children.front(); @@ -129,14 +129,14 @@ addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, auto & child = child_node->step; auto * filter = assert_cast(parent.get()); - const auto & expression = filter->getExpression(); + auto & expression = filter->getExpression(); const auto & filter_column_name = filter->getFilterColumnName(); - const auto * filter_node = expression->tryFindInOutputs(filter_column_name); + const auto * filter_node = expression.tryFindInOutputs(filter_column_name); if (update_parent_filter && !filter_node && !filter->removesFilterColumn()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", - filter_column_name, expression->dumpDAG()); + filter_column_name, expression.dumpDAG()); /// Add new Filter step before Child. /// Expression/Filter -> Child -> Something @@ -147,10 +147,10 @@ addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, /// Expression/Filter -> Child -> Filter -> Something /// New filter column is the first one. - String split_filter_column_name = split_filter->getOutputs().front()->result_name; + String split_filter_column_name = split_filter.getOutputs().front()->result_name; node.step = std::make_unique( - node.children.at(0)->step->getOutputStream(), split_filter, std::move(split_filter_column_name), can_remove_filter); + node.children.at(0)->step->getOutputStream(), std::move(split_filter), std::move(split_filter_column_name), can_remove_filter); if (auto * transforming_step = dynamic_cast(child.get())) { @@ -176,7 +176,7 @@ addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, { /// This means that all predicates of filter were pushed down. /// Replace current actions to expression, as we don't need to filter anything. - parent = std::make_unique(child->getOutputStream(), expression); + parent = std::make_unique(child->getOutputStream(), std::move(expression)); } else { @@ -192,7 +192,7 @@ tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, con bool can_remove_filter = true, size_t child_idx = 0) { if (auto split_filter = splitFilter(parent_node, allowed_inputs, child_idx)) - return addNewFilterStepOrThrow(parent_node, nodes, split_filter, can_remove_filter, child_idx); + return addNewFilterStepOrThrow(parent_node, nodes, std::move(*split_filter), can_remove_filter, child_idx); return 0; } @@ -332,7 +332,7 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: Names left_stream_available_columns_to_push_down = get_available_columns_for_filter(true /*push_to_left_stream*/, left_stream_filter_push_down_input_columns_available); Names right_stream_available_columns_to_push_down = get_available_columns_for_filter(false /*push_to_left_stream*/, right_stream_filter_push_down_input_columns_available); - auto join_filter_push_down_actions = filter->getExpression()->splitActionsForJOINFilterPushDown(filter->getFilterColumnName(), + auto join_filter_push_down_actions = filter->getExpression().splitActionsForJOINFilterPushDown(filter->getFilterColumnName(), filter->removesFilterColumn(), left_stream_available_columns_to_push_down, left_stream_input_header, @@ -346,42 +346,44 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: if (join_filter_push_down_actions.left_stream_filter_to_push_down) { + const auto & result_name = join_filter_push_down_actions.left_stream_filter_to_push_down->getOutputs()[0]->result_name; updated_steps += addNewFilterStepOrThrow(parent_node, nodes, - join_filter_push_down_actions.left_stream_filter_to_push_down, + std::move(*join_filter_push_down_actions.left_stream_filter_to_push_down), join_filter_push_down_actions.left_stream_filter_removes_filter, 0 /*child_idx*/, false /*update_parent_filter*/); LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), "Pushed down filter {} to the {} side of join", - join_filter_push_down_actions.left_stream_filter_to_push_down->getOutputs()[0]->result_name, + result_name, JoinKind::Left); } if (join_filter_push_down_actions.right_stream_filter_to_push_down && allow_push_down_to_right) { + const auto & result_name = join_filter_push_down_actions.right_stream_filter_to_push_down->getOutputs()[0]->result_name; updated_steps += addNewFilterStepOrThrow(parent_node, nodes, - join_filter_push_down_actions.right_stream_filter_to_push_down, + std::move(*join_filter_push_down_actions.right_stream_filter_to_push_down), join_filter_push_down_actions.right_stream_filter_removes_filter, 1 /*child_idx*/, false /*update_parent_filter*/); LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), "Pushed down filter {} to the {} side of join", - join_filter_push_down_actions.right_stream_filter_to_push_down->getOutputs()[0]->result_name, + result_name, JoinKind::Right); } if (updated_steps > 0) { const auto & filter_column_name = filter->getFilterColumnName(); - const auto & filter_expression = filter->getExpression(); + auto & filter_expression = filter->getExpression(); - const auto * filter_node = filter_expression->tryFindInOutputs(filter_column_name); + const auto * filter_node = filter_expression.tryFindInOutputs(filter_column_name); if (!filter_node && !filter->removesFilterColumn()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", - filter_column_name, filter_expression->dumpDAG()); + filter_column_name, filter_expression.dumpDAG()); /// Filter column was replaced to constant. @@ -391,7 +393,7 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: { /// This means that all predicates of filter were pushed down. /// Replace current actions to expression, as we don't need to filter anything. - parent = std::make_unique(child->getOutputStream(), filter_expression); + parent = std::make_unique(child->getOutputStream(), std::move(filter_expression)); } else { @@ -416,7 +418,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (!filter) return 0; - if (filter->getExpression()->hasStatefulFunctions()) + if (filter->getExpression().hasStatefulFunctions()) return 0; if (auto * aggregating = typeid_cast(child.get())) @@ -430,7 +432,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes return 0; const auto & actions = filter->getExpression(); - const auto & filter_node = actions->findInOutputs(filter->getFilterColumnName()); + const auto & filter_node = actions.findInOutputs(filter->getFilterColumnName()); auto identifiers_in_predicate = findIdentifiersOfNode(&filter_node); @@ -597,7 +599,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes filter_node.step = std::make_unique( filter_node.children.front()->step->getOutputStream(), - filter->getExpression()->clone(), + filter->getExpression().clone(), filter->getFilterColumnName(), filter->removesFilterColumn()); } @@ -611,7 +613,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (auto * read_from_merge = typeid_cast(child.get())) { - FilterDAGInfo info{filter->getExpression(), filter->getFilterColumnName(), filter->removesFilterColumn()}; + FilterDAGInfo info{filter->getExpression().clone(), filter->getFilterColumnName(), filter->removesFilterColumn()}; read_from_merge->addFilter(std::move(info)); std::swap(*parent_node, *child_node); return 1; diff --git a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp index 36aab41df49..0d4f2330119 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp @@ -28,10 +28,10 @@ size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & node const auto & expression = expression_step ? expression_step->getExpression() : filter_step->getExpression(); - auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns); + auto split_actions = expression.splitActionsBeforeArrayJoin(array_join->columns); /// No actions can be moved before ARRAY JOIN. - if (split_actions.first->trivial()) + if (split_actions.first.trivial()) return 0; auto description = parent->getStepDescription(); @@ -49,9 +49,9 @@ size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & node array_join_step->updateInputStream(node.step->getOutputStream()); if (expression_step) - parent = std::make_unique(array_join_step->getOutputStream(), split_actions.second); + parent = std::make_unique(array_join_step->getOutputStream(), std::move(split_actions.second)); else - parent = std::make_unique(array_join_step->getOutputStream(), split_actions.second, + parent = std::make_unique(array_join_step->getOutputStream(), std::move(split_actions.second), filter_step->getFilterColumnName(), filter_step->removesFilterColumn()); parent->setStepDescription(description + " [split]"); diff --git a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp index b280e2d3cc6..7794ddae8fa 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp @@ -66,13 +66,13 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan: NameSet sort_columns; for (const auto & col : sorting_step->getSortDescription()) sort_columns.insert(col.column_name); - auto [needed_for_sorting, unneeded_for_sorting, _] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns); + auto [needed_for_sorting, unneeded_for_sorting, _] = expression_step->getExpression().splitActionsBySortingDescription(sort_columns); // No calculations can be postponed. - if (unneeded_for_sorting->trivial()) + if (unneeded_for_sorting.trivial()) return 0; - if (!areNodesConvertableToBlock(needed_for_sorting->getOutputs()) || !areNodesConvertableToBlock(unneeded_for_sorting->getInputs())) + if (!areNodesConvertableToBlock(needed_for_sorting.getOutputs()) || !areNodesConvertableToBlock(unneeded_for_sorting.getInputs())) return 0; // Sorting (parent_node) -> Expression (child_node) diff --git a/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp b/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp index 35d8b1a35e4..c48551732c9 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp @@ -49,7 +49,7 @@ size_t tryLiftUpUnion(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) expr_node.step = std::make_unique( expr_node.children.front()->step->getOutputStream(), - expression->getExpression()->clone()); + expression->getExpression().clone()); } /// - Expression - Something diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp index 118abdd701f..fb4cb90c4ca 100644 --- a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp +++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp @@ -37,18 +37,18 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) if (parent_expr && child_expr) { - const auto & child_actions = child_expr->getExpression(); - const auto & parent_actions = parent_expr->getExpression(); + auto & child_actions = child_expr->getExpression(); + auto & parent_actions = parent_expr->getExpression(); /// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them. /// Example: select rowNumberInBlock() from (select arrayJoin([1, 2])) /// Such a query will return two zeroes if we combine actions together. - if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions()) + if (child_actions.hasArrayJoin() && parent_actions.hasStatefulFunctions()) return 0; - auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions)); + auto merged = ActionsDAG::merge(std::move(child_actions), std::move(parent_actions)); - auto expr = std::make_unique(child_expr->getInputStreams().front(), merged); + auto expr = std::make_unique(child_expr->getInputStreams().front(), std::move(merged)); expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")"); parent_node->step = std::move(expr); @@ -57,16 +57,16 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) } else if (parent_filter && child_expr) { - const auto & child_actions = child_expr->getExpression(); - const auto & parent_actions = parent_filter->getExpression(); + auto & child_actions = child_expr->getExpression(); + auto & parent_actions = parent_filter->getExpression(); - if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions()) + if (child_actions.hasArrayJoin() && parent_actions.hasStatefulFunctions()) return 0; - auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions)); + auto merged = ActionsDAG::merge(std::move(child_actions), std::move(parent_actions)); auto filter = std::make_unique(child_expr->getInputStreams().front(), - merged, + std::move(merged), parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn()); filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")"); @@ -93,32 +93,31 @@ size_t tryMergeFilters(QueryPlan::Node * parent_node, QueryPlan::Nodes &) if (parent_filter && child_filter) { - const auto & child_actions = child_filter->getExpression(); - const auto & parent_actions = parent_filter->getExpression(); + auto & child_actions = child_filter->getExpression(); + auto & parent_actions = parent_filter->getExpression(); - if (child_actions->hasArrayJoin()) + if (child_actions.hasArrayJoin()) return 0; - auto actions = child_actions->clone(); - const auto & child_filter_node = actions->findInOutputs(child_filter->getFilterColumnName()); + const auto & child_filter_node = child_actions.findInOutputs(child_filter->getFilterColumnName()); if (child_filter->removesFilterColumn()) - removeFromOutputs(*actions, child_filter_node); + removeFromOutputs(child_actions, child_filter_node); - actions->mergeInplace(std::move(*parent_actions->clone())); + child_actions.mergeInplace(std::move(parent_actions)); - const auto & parent_filter_node = actions->findInOutputs(parent_filter->getFilterColumnName()); + const auto & parent_filter_node = child_actions.findInOutputs(parent_filter->getFilterColumnName()); if (parent_filter->removesFilterColumn()) - removeFromOutputs(*actions, parent_filter_node); + removeFromOutputs(child_actions, parent_filter_node); FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); - const auto & condition = actions->addFunction(func_builder_and, {&child_filter_node, &parent_filter_node}, {}); - auto & outputs = actions->getOutputs(); + const auto & condition = child_actions.addFunction(func_builder_and, {&child_filter_node, &parent_filter_node}, {}); + auto & outputs = child_actions.getOutputs(); outputs.insert(outputs.begin(), &condition); - actions->removeUnusedActions(false); + child_actions.removeUnusedActions(false); auto filter = std::make_unique(child_filter->getInputStreams().front(), - actions, + std::move(child_actions), condition.result_name, true); filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_filter->getStepDescription() + ")"); diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 3cb7f8a5d83..dc73521210a 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -56,12 +56,12 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) return; const auto & storage_prewhere_info = source_step_with_filter->getPrewhereInfo(); - if (storage_prewhere_info && storage_prewhere_info->prewhere_actions) + if (storage_prewhere_info) return; /// TODO: We can also check for UnionStep, such as StorageBuffer and local distributed plans. QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node; - const auto * filter_step = typeid_cast(filter_node->step.get()); + auto * filter_step = typeid_cast(filter_node->step.get()); if (!filter_step) return; @@ -85,10 +85,11 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) Names queried_columns = source_step_with_filter->requiredSourceColumns(); + const auto & source_filter_actions_dag = source_step_with_filter->getFilterActionsDAG(); MergeTreeWhereOptimizer where_optimizer{ std::move(column_compressed_sizes), storage_metadata, - storage.getConditionSelectivityEstimatorByPredicate(storage_snapshot, source_step_with_filter->getFilterActionsDAG(), context), + storage.getConditionSelectivityEstimatorByPredicate(storage_snapshot, source_filter_actions_dag ? &*source_filter_actions_dag : nullptr, context), queried_columns, storage.supportedPrewhereColumns(), getLogger("QueryPlanOptimizePrewhere")}; @@ -110,20 +111,20 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) prewhere_info->need_filter = true; prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn(); - auto filter_expression = filter_step->getExpression(); + auto filter_expression = std::move(filter_step->getExpression()); const auto & filter_column_name = filter_step->getFilterColumnName(); if (prewhere_info->remove_prewhere_column) { - removeFromOutput(*filter_expression, filter_column_name); - auto & outputs = filter_expression->getOutputs(); + removeFromOutput(filter_expression, filter_column_name); + auto & outputs = filter_expression.getOutputs(); size_t size = outputs.size(); outputs.insert(outputs.end(), optimize_result.prewhere_nodes.begin(), optimize_result.prewhere_nodes.end()); - filter_expression->removeUnusedActions(false); + filter_expression.removeUnusedActions(false); outputs.resize(size); } - auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true, true); + auto split_result = filter_expression.split(optimize_result.prewhere_nodes, true, true); /// This is the leak of abstraction. /// Splited actions may have inputs which are needed only for PREWHERE. @@ -139,15 +140,15 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) /// So, here we restore removed inputs for PREWHERE actions { std::unordered_set first_outputs( - split_result.first->getOutputs().begin(), split_result.first->getOutputs().end()); - for (const auto * input : split_result.first->getInputs()) + split_result.first.getOutputs().begin(), split_result.first.getOutputs().end()); + for (const auto * input : split_result.first.getInputs()) { if (!first_outputs.contains(input)) { - split_result.first->getOutputs().push_back(input); + split_result.first.getOutputs().push_back(input); /// Add column to second actions as input. /// Do not add it to result, so it would be removed. - split_result.second->addInput(input->result_name, input->result_type); + split_result.second.addInput(input->result_name, input->result_type); } } } @@ -164,16 +165,16 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) { prewhere_info->prewhere_column_name = conditions.front()->result_name; if (prewhere_info->remove_prewhere_column) - prewhere_info->prewhere_actions->getOutputs().push_back(conditions.front()); + prewhere_info->prewhere_actions.getOutputs().push_back(conditions.front()); } else { prewhere_info->remove_prewhere_column = true; FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); - const auto * node = &prewhere_info->prewhere_actions->addFunction(func_builder_and, std::move(conditions), {}); + const auto * node = &prewhere_info->prewhere_actions.addFunction(func_builder_and, std::move(conditions), {}); prewhere_info->prewhere_column_name = node->result_name; - prewhere_info->prewhere_actions->getOutputs().push_back(node); + prewhere_info->prewhere_actions.getOutputs().push_back(node); } source_step_with_filter->updatePrewhereInfo(prewhere_info); diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp index da4e104d18b..490b79fbf8d 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp @@ -18,16 +18,16 @@ void optimizePrimaryKeyConditionAndLimit(const Stack & stack) const auto & storage_prewhere_info = source_step_with_filter->getPrewhereInfo(); if (storage_prewhere_info) { - source_step_with_filter->addFilter(storage_prewhere_info->prewhere_actions, storage_prewhere_info->prewhere_column_name); + source_step_with_filter->addFilter(storage_prewhere_info->prewhere_actions.clone(), storage_prewhere_info->prewhere_column_name); if (storage_prewhere_info->row_level_filter) - source_step_with_filter->addFilter(storage_prewhere_info->row_level_filter, storage_prewhere_info->row_level_column_name); + source_step_with_filter->addFilter(storage_prewhere_info->row_level_filter->clone(), storage_prewhere_info->row_level_column_name); } for (auto iter = stack.rbegin() + 1; iter != stack.rend(); ++iter) { if (auto * filter_step = typeid_cast(iter->node->step.get())) { - source_step_with_filter->addFilter(filter_step->getExpression(), filter_step->getFilterColumnName()); + source_step_with_filter->addFilter(filter_step->getExpression().clone(), filter_step->getFilterColumnName()); } else if (auto * limit_step = typeid_cast(iter->node->step.get())) { diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 28eb4da2e17..99df6da263f 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -171,17 +171,17 @@ static void appendFixedColumnsFromFilterExpression(const ActionsDAG::Node & filt } } -static void appendExpression(ActionsDAGPtr & dag, const ActionsDAGPtr & expression) +static void appendExpression(std::optional & dag, const ActionsDAG & expression) { if (dag) - dag->mergeInplace(std::move(*expression->clone())); + dag->mergeInplace(expression.clone()); else - dag = expression->clone(); + dag = expression.clone(); } /// This function builds a common DAG which is a merge of DAGs from Filter and Expression steps chain. /// Additionally, build a set of fixed columns. -void buildSortingDAG(QueryPlan::Node & node, ActionsDAGPtr & dag, FixedColumns & fixed_columns, size_t & limit) +void buildSortingDAG(QueryPlan::Node & node, std::optional & dag, FixedColumns & fixed_columns, size_t & limit) { IQueryPlanStep * step = node.step.get(); if (auto * reading = typeid_cast(step)) @@ -191,13 +191,11 @@ void buildSortingDAG(QueryPlan::Node & node, ActionsDAGPtr & dag, FixedColumns & /// Should ignore limit if there is filtering. limit = 0; - if (prewhere_info->prewhere_actions) - { - //std::cerr << "====== Adding prewhere " << std::endl; - appendExpression(dag, prewhere_info->prewhere_actions); - if (const auto * filter_expression = dag->tryFindInOutputs(prewhere_info->prewhere_column_name)) - appendFixedColumnsFromFilterExpression(*filter_expression, fixed_columns); - } + //std::cerr << "====== Adding prewhere " << std::endl; + appendExpression(dag, prewhere_info->prewhere_actions); + if (const auto * filter_expression = dag->tryFindInOutputs(prewhere_info->prewhere_column_name)) + appendFixedColumnsFromFilterExpression(*filter_expression, fixed_columns); + } return; } @@ -212,7 +210,7 @@ void buildSortingDAG(QueryPlan::Node & node, ActionsDAGPtr & dag, FixedColumns & const auto & actions = expression->getExpression(); /// Should ignore limit because arrayJoin() can reduce the number of rows in case of empty array. - if (actions->hasArrayJoin()) + if (actions.hasArrayJoin()) limit = 0; appendExpression(dag, actions); @@ -330,7 +328,7 @@ void enreachFixedColumns(const ActionsDAG & dag, FixedColumns & fixed_columns) InputOrderInfoPtr buildInputOrderInfo( const FixedColumns & fixed_columns, - const ActionsDAGPtr & dag, + const std::optional & dag, const SortDescription & description, const KeyDescription & sorting_key, size_t limit) @@ -507,7 +505,7 @@ struct AggregationInputOrder AggregationInputOrder buildInputOrderInfo( const FixedColumns & fixed_columns, - const ActionsDAGPtr & dag, + const std::optional & dag, const Names & group_by_keys, const ActionsDAG & sorting_key_dag, const Names & sorting_key_columns) @@ -693,7 +691,7 @@ AggregationInputOrder buildInputOrderInfo( InputOrderInfoPtr buildInputOrderInfo( const ReadFromMergeTree * reading, const FixedColumns & fixed_columns, - const ActionsDAGPtr & dag, + const std::optional & dag, const SortDescription & description, size_t limit) { @@ -709,7 +707,7 @@ InputOrderInfoPtr buildInputOrderInfo( InputOrderInfoPtr buildInputOrderInfo( ReadFromMerge * merge, const FixedColumns & fixed_columns, - const ActionsDAGPtr & dag, + const std::optional & dag, const SortDescription & description, size_t limit) { @@ -745,7 +743,7 @@ InputOrderInfoPtr buildInputOrderInfo( AggregationInputOrder buildInputOrderInfo( ReadFromMergeTree * reading, const FixedColumns & fixed_columns, - const ActionsDAGPtr & dag, + const std::optional & dag, const Names & group_by_keys) { const auto & sorting_key = reading->getStorageMetadata()->getSortingKey(); @@ -760,7 +758,7 @@ AggregationInputOrder buildInputOrderInfo( AggregationInputOrder buildInputOrderInfo( ReadFromMerge * merge, const FixedColumns & fixed_columns, - const ActionsDAGPtr & dag, + const std::optional & dag, const Names & group_by_keys) { const auto & tables = merge->getSelectedTables(); @@ -801,7 +799,7 @@ InputOrderInfoPtr buildInputOrderInfo(SortingStep & sorting, QueryPlan::Node & n const auto & description = sorting.getSortDescription(); size_t limit = sorting.getLimit(); - ActionsDAGPtr dag; + std::optional dag; FixedColumns fixed_columns; buildSortingDAG(node, dag, fixed_columns, limit); @@ -855,7 +853,7 @@ AggregationInputOrder buildInputOrderInfo(AggregatingStep & aggregating, QueryPl const auto & keys = aggregating.getParams().keys; size_t limit = 0; - ActionsDAGPtr dag; + std::optional dag; FixedColumns fixed_columns; buildSortingDAG(node, dag, fixed_columns, limit); @@ -1076,13 +1074,13 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, for (const auto & actions_dag : window_desc.partition_by_actions) { order_by_elements_actions.emplace_back( - std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); + std::make_shared(actions_dag->clone(), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); } for (const auto & actions_dag : window_desc.order_by_actions) { order_by_elements_actions.emplace_back( - std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); + std::make_shared(actions_dag->clone(), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes))); } auto order_optimizer = std::make_shared( diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 9e2c2dcc96d..ad89cec5f79 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -43,7 +43,7 @@ static DAGIndex buildDAGIndex(const ActionsDAG & dag) /// Required analysis info from aggregate projection. struct AggregateProjectionInfo { - ActionsDAGPtr before_aggregation; + std::optional before_aggregation; Names keys; AggregateDescriptions aggregates; @@ -274,7 +274,7 @@ static void appendAggregateFunctions( } } -ActionsDAGPtr analyzeAggregateProjection( +std::optional analyzeAggregateProjection( const AggregateProjectionInfo & info, const QueryDAG & query, const DAGIndex & query_index, @@ -394,7 +394,7 @@ ActionsDAGPtr analyzeAggregateProjection( // LOG_TRACE(getLogger("optimizeUseProjections"), "Folding actions by projection"); auto proj_dag = query.dag->foldActionsByProjection(new_inputs, query_key_nodes); - appendAggregateFunctions(*proj_dag, aggregates, *matched_aggregates); + appendAggregateFunctions(proj_dag, aggregates, *matched_aggregates); return proj_dag; } @@ -406,7 +406,7 @@ struct AggregateProjectionCandidate : public ProjectionCandidate /// Actions which need to be applied to columns from projection /// in order to get all the columns required for aggregation. - ActionsDAGPtr dag; + ActionsDAG dag; }; struct MinMaxProjectionCandidate @@ -481,13 +481,13 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( if (auto proj_dag = analyzeAggregateProjection(info, dag, query_index, keys, aggregates)) { // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection analyzed DAG {}", proj_dag->dumpDAG()); - AggregateProjectionCandidate candidate{.info = std::move(info), .dag = std::move(proj_dag)}; + AggregateProjectionCandidate candidate{.info = std::move(info), .dag = std::move(*proj_dag)}; // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection sample block {}", sample_block.dumpStructure()); auto block = reading.getMergeTreeData().getMinMaxCountProjectionBlock( metadata, - candidate.dag->getRequiredColumnsNames(), - (dag.filter_node ? dag.dag : nullptr), + candidate.dag.getRequiredColumnsNames(), + (dag.filter_node ? &*dag.dag : nullptr), parts, max_added_blocks.get(), context); @@ -538,7 +538,7 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( if (auto proj_dag = analyzeAggregateProjection(info, dag, query_index, keys, aggregates)) { // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection analyzed DAG {}", proj_dag->dumpDAG()); - AggregateProjectionCandidate candidate{.info = std::move(info), .dag = std::move(proj_dag)}; + AggregateProjectionCandidate candidate{.info = std::move(info), .dag = std::move(*proj_dag)}; candidate.projection = projection; candidates.real.emplace_back(std::move(candidate)); } @@ -666,7 +666,7 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu /// Selecting best candidate. for (auto & candidate : candidates.real) { - auto required_column_names = candidate.dag->getRequiredColumnsNames(); + auto required_column_names = candidate.dag.getRequiredColumnsNames(); bool analyzed = analyzeProjectionCandidate( candidate, @@ -677,7 +677,7 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu query_info, context, max_added_blocks, - candidate.dag); + &candidate.dag); if (!analyzed) continue; @@ -767,7 +767,7 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu projection_reading = reader.readFromParts( /* parts = */ {}, /* alter_conversions = */ {}, - best_candidate->dag->getRequiredColumnsNames(), + best_candidate->dag.getRequiredColumnsNames(), proj_snapshot, projection_query_info, context, @@ -779,7 +779,7 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu if (!projection_reading) { - auto header = proj_snapshot->getSampleBlockForColumns(best_candidate->dag->getRequiredColumnsNames()); + auto header = proj_snapshot->getSampleBlockForColumns(best_candidate->dag.getRequiredColumnsNames()); Pipe pipe(std::make_shared(std::move(header))); projection_reading = std::make_unique(std::move(pipe)); } @@ -810,17 +810,19 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu if (best_candidate) { aggregate_projection_node = &nodes.emplace_back(); + if (candidates.has_filter) { + const auto & result_name = best_candidate->dag.getOutputs().front()->result_name; aggregate_projection_node->step = std::make_unique( projection_reading_node.step->getOutputStream(), - best_candidate->dag, - best_candidate->dag->getOutputs().front()->result_name, + std::move(best_candidate->dag), + result_name, true); } else aggregate_projection_node->step - = std::make_unique(projection_reading_node.step->getOutputStream(), best_candidate->dag); + = std::make_unique(projection_reading_node.step->getOutputStream(), std::move(best_candidate->dag)); aggregate_projection_node->children.push_back(&projection_reading_node); } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 291800c2c86..e9aee65675f 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -25,7 +25,7 @@ struct NormalProjectionCandidate : public ProjectionCandidate { }; -static ActionsDAGPtr makeMaterializingDAG(const Block & proj_header, const Block main_header) +static std::optional makeMaterializingDAG(const Block & proj_header, const Block main_header) { /// Materialize constants in case we don't have it in output header. /// This may happen e.g. if we have PREWHERE. @@ -33,7 +33,7 @@ static ActionsDAGPtr makeMaterializingDAG(const Block & proj_header, const Block size_t num_columns = main_header.columns(); /// This is a error; will have block structure mismatch later. if (proj_header.columns() != num_columns) - return nullptr; + return {}; std::vector const_positions; for (size_t i = 0; i < num_columns; ++i) @@ -47,17 +47,17 @@ static ActionsDAGPtr makeMaterializingDAG(const Block & proj_header, const Block } if (const_positions.empty()) - return nullptr; + return {}; - ActionsDAGPtr dag = std::make_unique(); - auto & outputs = dag->getOutputs(); + ActionsDAG dag; + auto & outputs = dag.getOutputs(); for (const auto & col : proj_header.getColumnsWithTypeAndName()) - outputs.push_back(&dag->addInput(col)); + outputs.push_back(&dag.addInput(col)); for (auto pos : const_positions) { auto & output = outputs[pos]; - output = &dag->materializeNode(*output); + output = &dag.materializeNode(*output); } return dag; @@ -174,7 +174,7 @@ std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod query_info, context, max_added_blocks, - query.filter_node ? query.dag : nullptr); + query.filter_node ? &*query.dag : nullptr); if (!analyzed) continue; @@ -244,14 +244,14 @@ std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod { expr_or_filter_node.step = std::make_unique( projection_reading_node.step->getOutputStream(), - query.dag, + std::move(*query.dag), query.filter_node->result_name, true); } else expr_or_filter_node.step = std::make_unique( projection_reading_node.step->getOutputStream(), - query.dag); + std::move(*query.dag)); expr_or_filter_node.children.push_back(&projection_reading_node); next_node = &expr_or_filter_node; @@ -269,7 +269,7 @@ std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod if (auto materializing = makeMaterializingDAG(proj_stream->header, main_stream.header)) { - auto converting = std::make_unique(*proj_stream, materializing); + auto converting = std::make_unique(*proj_stream, std::move(*materializing)); proj_stream = &converting->getOutputStream(); auto & expr_node = nodes.emplace_back(); expr_node.step = std::move(converting); diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index 4d60a1135a6..7414d479cc9 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -65,12 +65,12 @@ std::shared_ptr getMaxAddedBlocks(ReadFromMergeTree * rea return {}; } -void QueryDAG::appendExpression(const ActionsDAGPtr & expression) +void QueryDAG::appendExpression(const ActionsDAG & expression) { if (dag) - dag->mergeInplace(std::move(*expression->clone())); + dag->mergeInplace(expression.clone()); else - dag = expression->clone(); + dag = expression.clone(); } const ActionsDAG::Node * findInOutputs(ActionsDAG & dag, const std::string & name, bool remove) @@ -121,22 +121,19 @@ bool QueryDAG::buildImpl(QueryPlan::Node & node, ActionsDAG::NodeRawConstPtrs & { if (prewhere_info->row_level_filter) { - appendExpression(prewhere_info->row_level_filter); + appendExpression(*prewhere_info->row_level_filter); if (const auto * filter_expression = findInOutputs(*dag, prewhere_info->row_level_column_name, false)) filter_nodes.push_back(filter_expression); else return false; } - if (prewhere_info->prewhere_actions) - { - appendExpression(prewhere_info->prewhere_actions); - if (const auto * filter_expression - = findInOutputs(*dag, prewhere_info->prewhere_column_name, prewhere_info->remove_prewhere_column)) - filter_nodes.push_back(filter_expression); - else - return false; - } + appendExpression(prewhere_info->prewhere_actions); + if (const auto * filter_expression + = findInOutputs(*dag, prewhere_info->prewhere_column_name, prewhere_info->remove_prewhere_column)) + filter_nodes.push_back(filter_expression); + else + return false; } return true; } @@ -150,7 +147,7 @@ bool QueryDAG::buildImpl(QueryPlan::Node & node, ActionsDAG::NodeRawConstPtrs & if (auto * expression = typeid_cast(step)) { const auto & actions = expression->getExpression(); - if (actions->hasArrayJoin()) + if (actions.hasArrayJoin()) return false; appendExpression(actions); @@ -160,7 +157,7 @@ bool QueryDAG::buildImpl(QueryPlan::Node & node, ActionsDAG::NodeRawConstPtrs & if (auto * filter = typeid_cast(step)) { const auto & actions = filter->getExpression(); - if (actions->hasArrayJoin()) + if (actions.hasArrayJoin()) return false; appendExpression(actions); @@ -214,7 +211,7 @@ bool analyzeProjectionCandidate( const SelectQueryInfo & query_info, const ContextPtr & context, const std::shared_ptr & max_added_blocks, - const ActionsDAGPtr & dag) + const ActionsDAG * dag) { MergeTreeData::DataPartsVector projection_parts; MergeTreeData::DataPartsVector normal_parts; @@ -239,7 +236,8 @@ bool analyzeProjectionCandidate( auto projection_query_info = query_info; projection_query_info.prewhere_info = nullptr; - projection_query_info.filter_actions_dag = dag; + if (dag) + projection_query_info.filter_actions_dag = std::make_unique(dag->clone()); auto projection_result_ptr = reader.estimateNumMarksToRead( std::move(projection_parts), diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h index e1e106b988e..ee0dfddc326 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h @@ -25,14 +25,14 @@ std::shared_ptr getMaxAddedBlocks(ReadFromMergeTree * rea /// Additionally, for all the Filter steps, we collect filter conditions into filter_nodes. struct QueryDAG { - ActionsDAGPtr dag; + std::optional dag; const ActionsDAG::Node * filter_node = nullptr; bool build(QueryPlan::Node & node); private: bool buildImpl(QueryPlan::Node & node, ActionsDAG::NodeRawConstPtrs & filter_nodes); - void appendExpression(const ActionsDAGPtr & expression); + void appendExpression(const ActionsDAG & expression); }; struct ProjectionCandidate @@ -60,6 +60,6 @@ bool analyzeProjectionCandidate( const SelectQueryInfo & query_info, const ContextPtr & context, const std::shared_ptr & max_added_blocks, - const ActionsDAGPtr & dag); + const ActionsDAG * dag); } diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp index 51df25b35f4..7664822cc7e 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp @@ -43,10 +43,10 @@ namespace } } - void logActionsDAG(const String & prefix, const ActionsDAGPtr & actions) + void logActionsDAG(const String & prefix, const ActionsDAG & actions) { if constexpr (debug_logging_enabled) - LOG_DEBUG(getLogger("redundantDistinct"), "{} :\n{}", prefix, actions->dumpDAG()); + LOG_DEBUG(getLogger("redundantDistinct"), "{} :\n{}", prefix, actions.dumpDAG()); } using DistinctColumns = std::set; @@ -65,25 +65,25 @@ namespace } /// build actions DAG from stack of steps - ActionsDAGPtr buildActionsForPlanPath(std::vector & dag_stack) + std::optional buildActionsForPlanPath(std::vector & dag_stack) { if (dag_stack.empty()) - return nullptr; + return {}; - ActionsDAGPtr path_actions = dag_stack.back()->clone(); + ActionsDAG path_actions = dag_stack.back()->clone(); dag_stack.pop_back(); while (!dag_stack.empty()) { - ActionsDAGPtr clone = dag_stack.back()->clone(); + ActionsDAG clone = dag_stack.back()->clone(); logActionsDAG("DAG to merge", clone); dag_stack.pop_back(); - path_actions->mergeInplace(std::move(*clone)); + path_actions.mergeInplace(std::move(clone)); } return path_actions; } bool compareAggregationKeysWithDistinctColumns( - const Names & aggregation_keys, const DistinctColumns & distinct_columns, std::vector> actions_chain) + const Names & aggregation_keys, const DistinctColumns & distinct_columns, std::vector> actions_chain) { logDebug("aggregation_keys", aggregation_keys); logDebug("aggregation_keys size", aggregation_keys.size()); @@ -93,7 +93,8 @@ namespace std::set source_columns; for (auto & actions : actions_chain) { - FindOriginalNodeForOutputName original_node_finder(buildActionsForPlanPath(actions)); + auto tmp_actions = buildActionsForPlanPath(actions); + FindOriginalNodeForOutputName original_node_finder(*tmp_actions); for (const auto & column : current_columns) { logDebug("distinct column name", column); @@ -131,10 +132,10 @@ namespace return true; if (const auto * const expr = typeid_cast(step); expr) - return !expr->getExpression()->hasArrayJoin(); + return !expr->getExpression().hasArrayJoin(); if (const auto * const filter = typeid_cast(step); filter) - return !filter->getExpression()->hasArrayJoin(); + return !filter->getExpression().hasArrayJoin(); if (typeid_cast(step) || typeid_cast(step) || typeid_cast(step) || typeid_cast(step)) @@ -152,8 +153,8 @@ namespace const DistinctStep * distinct_step = typeid_cast(distinct_node->step.get()); chassert(distinct_step); - std::vector dag_stack; - std::vector> actions_chain; + std::vector dag_stack; + std::vector> actions_chain; const DistinctStep * inner_distinct_step = nullptr; const IQueryPlanStep * aggregation_before_distinct = nullptr; const QueryPlan::Node * node = distinct_node; @@ -182,9 +183,9 @@ namespace } if (const auto * const expr = typeid_cast(current_step); expr) - dag_stack.push_back(expr->getExpression()); + dag_stack.push_back(&expr->getExpression()); else if (const auto * const filter = typeid_cast(current_step); filter) - dag_stack.push_back(filter->getExpression()); + dag_stack.push_back(&filter->getExpression()); node = node->children.front(); if (inner_distinct_step = typeid_cast(node->step.get()); inner_distinct_step) @@ -222,7 +223,7 @@ namespace chassert(distinct_step); const auto distinct_columns = getDistinctColumns(distinct_step); - std::vector dag_stack; + std::vector dag_stack; const DistinctStep * inner_distinct_step = nullptr; const QueryPlan::Node * node = distinct_node; while (!node->children.empty()) @@ -235,9 +236,9 @@ namespace } if (const auto * const expr = typeid_cast(current_step); expr) - dag_stack.push_back(expr->getExpression()); + dag_stack.push_back(&expr->getExpression()); else if (const auto * const filter = typeid_cast(current_step); filter) - dag_stack.push_back(filter->getExpression()); + dag_stack.push_back(&filter->getExpression()); node = node->children.front(); inner_distinct_step = typeid_cast(node->step.get()); @@ -259,11 +260,11 @@ namespace if (distinct_columns.size() != inner_distinct_columns.size()) return false; - ActionsDAGPtr path_actions; + ActionsDAG path_actions; if (!dag_stack.empty()) { /// build actions DAG to find original column names - path_actions = buildActionsForPlanPath(dag_stack); + path_actions = std::move(*buildActionsForPlanPath(dag_stack)); logActionsDAG("distinct pass: merged DAG", path_actions); /// compare columns of two DISTINCTs diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp index 632eba6ab5f..7cac7bee6ec 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp @@ -213,12 +213,12 @@ private: logStep("checking for stateful function", node); if (const auto * expr = typeid_cast(step); expr) { - if (expr->getExpression()->hasStatefulFunctions()) + if (expr->getExpression().hasStatefulFunctions()) return false; } else if (const auto * filter = typeid_cast(step); filter) { - if (filter->getExpression()->hasStatefulFunctions()) + if (filter->getExpression().hasStatefulFunctions()) return false; } else diff --git a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp index 561ad7302c6..6aed57634b0 100644 --- a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp +++ b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp @@ -17,13 +17,13 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) const std::string & filter_column_name = filter_step->getFilterColumnName(); /// Do not split if there are function like runningDifference. - if (expr->hasStatefulFunctions()) + if (expr.hasStatefulFunctions()) return 0; bool filter_name_clashs_with_input = false; if (filter_step->removesFilterColumn()) { - for (const auto * input : expr->getInputs()) + for (const auto * input : expr.getInputs()) { if (input->result_name == filter_column_name) { @@ -33,14 +33,14 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) } } - auto split = expr->splitActionsForFilter(filter_column_name); + auto split = expr.splitActionsForFilter(filter_column_name); - if (split.second->trivial()) + if (split.second.trivial()) return 0; bool remove_filter = false; if (filter_step->removesFilterColumn()) - remove_filter = split.second->removeUnusedResult(filter_column_name); + remove_filter = split.second.removeUnusedResult(filter_column_name); auto description = filter_step->getStepDescription(); @@ -53,11 +53,11 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) { split_filter_name = "__split_filter"; - for (auto & filter_output : split.first->getOutputs()) + for (auto & filter_output : split.first.getOutputs()) { if (filter_output->result_name == filter_column_name) { - filter_output = &split.first->addAlias(*filter_output, split_filter_name); + filter_output = &split.first.addAlias(*filter_output, split_filter_name); break; } } diff --git a/src/Processors/QueryPlan/Optimizations/useDataParallelAggregation.cpp b/src/Processors/QueryPlan/Optimizations/useDataParallelAggregation.cpp index 124cb735d5a..0eeaec9bde7 100644 --- a/src/Processors/QueryPlan/Optimizations/useDataParallelAggregation.cpp +++ b/src/Processors/QueryPlan/Optimizations/useDataParallelAggregation.cpp @@ -74,11 +74,11 @@ void removeInjectiveFunctionsFromResultsRecursively(const ActionsDAG::Node * nod /// Our objective is to replace injective function nodes in `actions` results with its children /// until only the irreducible subset of nodes remains. Against these set of nodes we will match partition key expression /// to determine if it maps all rows with the same value of group by key to the same partition. -NodeSet removeInjectiveFunctionsFromResultsRecursively(const ActionsDAGPtr & actions) +NodeSet removeInjectiveFunctionsFromResultsRecursively(const ActionsDAG & actions) { NodeSet irreducible; NodeSet visited; - for (const auto & node : actions->getOutputs()) + for (const auto & node : actions.getOutputs()) removeInjectiveFunctionsFromResultsRecursively(node, irreducible, visited); return irreducible; } @@ -146,19 +146,19 @@ bool allOutputsDependsOnlyOnAllowedNodes( /// 3. We match partition key actions with group by key actions to find col1', ..., coln' in partition key actions. /// 4. We check that partition key is indeed a deterministic function of col1', ..., coln'. bool isPartitionKeySuitsGroupByKey( - const ReadFromMergeTree & reading, const ActionsDAGPtr & group_by_actions, const AggregatingStep & aggregating) + const ReadFromMergeTree & reading, const ActionsDAG & group_by_actions, const AggregatingStep & aggregating) { if (aggregating.isGroupingSets()) return false; - if (group_by_actions->hasArrayJoin() || group_by_actions->hasStatefulFunctions() || group_by_actions->hasNonDeterministic()) + if (group_by_actions.hasArrayJoin() || group_by_actions.hasStatefulFunctions() || group_by_actions.hasNonDeterministic()) return false; /// We are interested only in calculations required to obtain group by keys (and not aggregate function arguments for example). - auto key_nodes = group_by_actions->findInOutpus(aggregating.getParams().keys); + auto key_nodes = group_by_actions.findInOutpus(aggregating.getParams().keys); auto group_by_key_actions = ActionsDAG::cloneSubDAG(key_nodes, /*remove_aliases=*/ true); - const auto & gb_key_required_columns = group_by_key_actions->getRequiredColumnsNames(); + const auto & gb_key_required_columns = group_by_key_actions.getRequiredColumnsNames(); const auto & partition_actions = reading.getStorageMetadata()->getPartitionKey().expression->getActionsDAG(); @@ -169,7 +169,7 @@ bool isPartitionKeySuitsGroupByKey( const auto irreducibe_nodes = removeInjectiveFunctionsFromResultsRecursively(group_by_key_actions); - const auto matches = matchTrees(group_by_key_actions->getOutputs(), partition_actions); + const auto matches = matchTrees(group_by_key_actions.getOutputs(), partition_actions); return allOutputsDependsOnlyOnAllowedNodes(partition_actions, irreducibe_nodes, matches); } diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index ed4b1906635..a12fce95b10 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -943,7 +943,7 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey( auto syntax_result = TreeRewriter(context).analyze(filter_function, primary_key.expression->getRequiredColumnsWithTypes()); auto actions = ExpressionAnalyzer(filter_function, syntax_result, context).getActionsDAG(false); - reorderColumns(*actions, result.merging_pipes[i].getHeader(), filter_function->getColumnName()); + reorderColumns(actions, result.merging_pipes[i].getHeader(), filter_function->getColumnName()); ExpressionActionsPtr expression_actions = std::make_shared(std::move(actions)); auto description = fmt::format( "filter values in ({}, {}]", i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf"); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 6e86d14f5cc..483876dd293 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -109,8 +109,7 @@ bool restorePrewhereInputs(PrewhereInfo & info, const NameSet & inputs) if (info.row_level_filter) added = added || restoreDAGInputs(*info.row_level_filter, inputs); - if (info.prewhere_actions) - added = added || restoreDAGInputs(*info.prewhere_actions, inputs); + added = added || restoreDAGInputs(info.prewhere_actions, inputs); return added; } @@ -175,7 +174,6 @@ static void updateSortDescriptionForOutputStream( Block original_header = output_stream.header.cloneEmpty(); if (prewhere_info) { - if (prewhere_info->prewhere_actions) { FindOriginalNodeForOutputName original_column_finder(prewhere_info->prewhere_actions); for (auto & column : original_header) @@ -188,7 +186,7 @@ static void updateSortDescriptionForOutputStream( if (prewhere_info->row_level_filter) { - FindOriginalNodeForOutputName original_column_finder(prewhere_info->row_level_filter); + FindOriginalNodeForOutputName original_column_finder(*prewhere_info->row_level_filter); for (auto & column : original_header) { const auto * original_node = original_column_finder.find(column.name); @@ -802,7 +800,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_ info.use_uncompressed_cache); }; - auto sorting_expr = std::make_shared(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); + auto sorting_expr = metadata_for_reading->getSortingKey().expression; SplitPartsWithRangesByPrimaryKeyResult split_ranges_result = splitPartsWithRangesByPrimaryKey( metadata_for_reading->getPrimaryKey(), @@ -834,10 +832,10 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_ pipes[0].getHeader().getColumnsWithTypeAndName(), pipes[1].getHeader().getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); + auto converting_expr = std::make_shared(std::move(conversion_action)); pipes[0].addSimpleTransform( - [conversion_action](const Block & header) + [converting_expr](const Block & header) { - auto converting_expr = std::make_shared(conversion_action); return std::make_shared(header, converting_expr); }); return Pipe::unitePipes(std::move(pipes)); @@ -851,16 +849,16 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_ info.use_uncompressed_cache); } -static ActionsDAGPtr createProjection(const Block & header) +static ActionsDAG createProjection(const Block & header) { - return std::make_shared(header.getNamesAndTypesList()); + return ActionsDAG(header.getNamesAndTypesList()); } Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( RangesInDataParts && parts_with_ranges, size_t num_streams, const Names & column_names, - ActionsDAGPtr & out_projection, + std::optional & out_projection, const InputOrderInfoPtr & input_order_info) { const auto & settings = context->getSettingsRef(); @@ -943,6 +941,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( PoolSettings pool_settings { + .threads = num_streams, + .sum_marks = parts_with_ranges.getMarksCountAllParts(), .min_marks_for_concurrent_read = info.min_marks_for_concurrent_read, .preferred_block_size_bytes = settings.preferred_block_size_bytes, .use_uncompressed_cache = info.use_uncompressed_cache, @@ -1050,7 +1050,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( for (size_t j = 0; j < prefix_size; ++j) sort_description.emplace_back(sorting_columns[j], input_order_info->direction); - auto sorting_key_expr = std::make_shared(sorting_key_prefix_expr); + auto sorting_key_expr = std::make_shared(std::move(sorting_key_prefix_expr)); auto merge_streams = [&](Pipe & pipe) { @@ -1174,7 +1174,7 @@ bool ReadFromMergeTree::doNotMergePartsAcrossPartitionsFinal() const } Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( - RangesInDataParts && parts_with_ranges, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection) + RangesInDataParts && parts_with_ranges, size_t num_streams, const Names & origin_column_names, const Names & column_names, std::optional & out_projection) { const auto & settings = context->getSettingsRef(); const auto & data_settings = data.getSettings(); @@ -1215,7 +1215,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// we will store lonely parts with level > 0 to use parallel select on them. RangesInDataParts non_intersecting_parts_by_primary_key; - auto sorting_expr = std::make_shared(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); + auto sorting_expr = metadata_for_reading->getSortingKey().expression; if (prewhere_info) { @@ -1336,7 +1336,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( if (!merging_pipes.empty() && !no_merging_pipes.empty()) { - out_projection = nullptr; /// We do projection here + out_projection = {}; /// We do projection here Pipes pipes; pipes.resize(2); pipes[0] = Pipe::unitePipes(std::move(merging_pipes)); @@ -1345,10 +1345,10 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( pipes[0].getHeader().getColumnsWithTypeAndName(), pipes[1].getHeader().getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); + auto converting_expr = std::make_shared(std::move(conversion_action)); pipes[0].addSimpleTransform( - [conversion_action](const Block & header) + [converting_expr](const Block & header) { - auto converting_expr = std::make_shared(conversion_action); return std::make_shared(header, converting_expr); }); return Pipe::unitePipes(std::move(pipes)); @@ -1382,7 +1382,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( static void buildIndexes( std::optional & indexes, - ActionsDAGPtr filter_actions_dag, + const ActionsDAG * filter_actions_dag, const MergeTreeData & data, const MergeTreeData::DataPartsVector & parts, const ContextPtr & context, @@ -1522,11 +1522,12 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) /// (1) SourceStepWithFilter::filter_nodes, (2) query_info.filter_actions_dag. Make sure there are consistent. /// TODO: Get rid of filter_actions_dag in query_info after we move analysis of /// parallel replicas and unused shards into optimization, similar to projection analysis. - query_info.filter_actions_dag = filter_actions_dag; + if (filter_actions_dag) + query_info.filter_actions_dag = std::make_shared(filter_actions_dag->clone()); buildIndexes( indexes, - filter_actions_dag, + query_info.filter_actions_dag.get(), data, prepared_parts, context, @@ -1568,7 +1569,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( const Names & primary_key_column_names = primary_key.column_names; if (!indexes) - buildIndexes(indexes, query_info_.filter_actions_dag, data, parts, context_, query_info_, metadata_snapshot); + buildIndexes(indexes, query_info_.filter_actions_dag.get(), data, parts, context_, query_info_, metadata_snapshot); if (indexes->part_values && indexes->part_values->empty()) return std::make_shared(std::move(result)); @@ -1836,7 +1837,7 @@ bool ReadFromMergeTree::isQueryWithSampling() const } Pipe ReadFromMergeTree::spreadMarkRanges( - RangesInDataParts && parts_with_ranges, size_t num_streams, AnalysisResult & result, ActionsDAGPtr & result_projection) + RangesInDataParts && parts_with_ranges, size_t num_streams, AnalysisResult & result, std::optional & result_projection) { const bool final = isQueryWithFinal(); Names column_names_to_read = result.column_names_to_read; @@ -1897,7 +1898,7 @@ Pipe ReadFromMergeTree::spreadMarkRanges( } } -Pipe ReadFromMergeTree::groupStreamsByPartition(AnalysisResult & result, ActionsDAGPtr & result_projection) +Pipe ReadFromMergeTree::groupStreamsByPartition(AnalysisResult & result, std::optional & result_projection) { auto && parts_with_ranges = std::move(result.parts_with_ranges); @@ -1986,7 +1987,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons /// Projection, that needed to drop columns, which have appeared by execution /// of some extra expressions, and to allow execute the same expressions later. /// NOTE: It may lead to double computation of expressions. - ActionsDAGPtr result_projection; + std::optional result_projection; Pipe pipe = output_each_partition_through_separate_port ? groupStreamsByPartition(result, result_projection) @@ -2003,7 +2004,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons if (result.sampling.use_sampling) { - auto sampling_actions = std::make_shared(result.sampling.filter_expression); + auto sampling_actions = std::make_shared(result.sampling.filter_expression->clone()); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( @@ -2016,12 +2017,12 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons Block cur_header = pipe.getHeader(); - auto append_actions = [&result_projection](ActionsDAGPtr actions) + auto append_actions = [&result_projection](ActionsDAG actions) { if (!result_projection) result_projection = std::move(actions); else - result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions)); + result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(actions)); }; if (result_projection) @@ -2041,7 +2042,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons if (result_projection) { - auto projection_actions = std::make_shared(result_projection); + auto projection_actions = std::make_shared(std::move(*result_projection)); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared(header, projection_actions); @@ -2058,7 +2059,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons ActionsDAG::MatchColumnsMode::Name, true); - auto converting_dag_expr = std::make_shared(convert_actions_dag); + auto converting_dag_expr = std::make_shared(std::move(convert_actions_dag)); pipe.addSimpleTransform([&](const Block & header) { @@ -2128,7 +2129,6 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const prefix.push_back(format_settings.indent_char); prefix.push_back(format_settings.indent_char); - if (prewhere_info->prewhere_actions) { format_settings.out << prefix << "Prewhere filter" << '\n'; format_settings.out << prefix << "Prewhere filter column: " << prewhere_info->prewhere_column_name; @@ -2136,7 +2136,7 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const format_settings.out << " (removed)"; format_settings.out << '\n'; - auto expression = std::make_shared(prewhere_info->prewhere_actions); + auto expression = std::make_shared(prewhere_info->prewhere_actions.clone()); expression->describeActions(format_settings.out, prefix); } @@ -2145,7 +2145,7 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const format_settings.out << prefix << "Row level filter" << '\n'; format_settings.out << prefix << "Row level filter column: " << prewhere_info->row_level_column_name << '\n'; - auto expression = std::make_shared(prewhere_info->row_level_filter); + auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); expression->describeActions(format_settings.out, prefix); } } @@ -2166,12 +2166,11 @@ void ReadFromMergeTree::describeActions(JSONBuilder::JSONMap & map) const std::unique_ptr prewhere_info_map = std::make_unique(); prewhere_info_map->add("Need filter", prewhere_info->need_filter); - if (prewhere_info->prewhere_actions) { std::unique_ptr prewhere_filter_map = std::make_unique(); prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); - auto expression = std::make_shared(prewhere_info->prewhere_actions); + auto expression = std::make_shared(prewhere_info->prewhere_actions.clone()); prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); @@ -2181,7 +2180,7 @@ void ReadFromMergeTree::describeActions(JSONBuilder::JSONMap & map) const { std::unique_ptr row_level_filter_map = std::make_unique(); row_level_filter_map->add("Row level filter column", prewhere_info->row_level_column_name); - auto expression = std::make_shared(prewhere_info->row_level_filter); + auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); row_level_filter_map->add("Row level filter expression", expression->toTree()); prewhere_info_map->add("Row level filter", std::move(row_level_filter_map)); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index caa8aa2e1bd..a12f53924c3 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -23,7 +23,7 @@ struct MergeTreeDataSelectSamplingData bool read_nothing = false; Float64 used_sample_factor = 1.0; std::shared_ptr filter_function; - ActionsDAGPtr filter_expression; + std::shared_ptr filter_expression; }; struct UsefulSkipIndexes @@ -243,9 +243,9 @@ private: Pipe readFromPoolParallelReplicas(RangesInDataParts parts_with_range, Names required_columns, PoolSettings pool_settings); Pipe readInOrder(RangesInDataParts parts_with_ranges, Names required_columns, PoolSettings pool_settings, ReadType read_type, UInt64 limit); - Pipe spreadMarkRanges(RangesInDataParts && parts_with_ranges, size_t num_streams, AnalysisResult & result, ActionsDAGPtr & result_projection); + Pipe spreadMarkRanges(RangesInDataParts && parts_with_ranges, size_t num_streams, AnalysisResult & result, std::optional & result_projection); - Pipe groupStreamsByPartition(AnalysisResult & result, ActionsDAGPtr & result_projection); + Pipe groupStreamsByPartition(AnalysisResult & result, std::optional & result_projection); Pipe spreadMarkRangesAmongStreams(RangesInDataParts && parts_with_ranges, size_t num_streams, const Names & column_names); @@ -253,13 +253,13 @@ private: RangesInDataParts && parts_with_ranges, size_t num_streams, const Names & column_names, - ActionsDAGPtr & out_projection, + std::optional & out_projection, const InputOrderInfoPtr & input_order_info); bool doNotMergePartsAcrossPartitionsFinal() const; Pipe spreadMarkRangesAmongStreamsFinal( - RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection); + RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, std::optional & out_projection); ReadFromMergeTree::AnalysisResult getAnalysisResult() const; diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index e27515a62a4..cf11052cd59 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -21,7 +21,7 @@ #include #include #include - +#include #include namespace DB @@ -362,7 +362,6 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( ASTPtr query_ast_, ClusterPtr cluster_, const StorageID & storage_id_, - ParallelReplicasReadingCoordinatorPtr coordinator_, Block header_, QueryProcessingStage::Enum stage_, ContextMutablePtr context_, @@ -375,7 +374,6 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( , cluster(cluster_) , query_ast(query_ast_) , storage_id(storage_id_) - , coordinator(std::move(coordinator_)) , stage(std::move(stage_)) , context(context_) , throttler(throttler_) @@ -438,6 +436,9 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder shuffled_pool = shard.pool->getShuffledPools(current_settings, priority_func); } + coordinator + = std::make_shared(max_replicas_to_use, current_settings.parallel_replicas_mark_segment_size); + for (size_t i=0; i < max_replicas_to_use; ++i) { IConnections::ReplicaInfo replica_info diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index eb15269155a..1adb26b2915 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -70,7 +70,6 @@ public: ASTPtr query_ast_, ClusterPtr cluster_, const StorageID & storage_id_, - ParallelReplicasReadingCoordinatorPtr coordinator_, Block header_, QueryProcessingStage::Enum stage_, ContextMutablePtr context_, diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index b070bbe739b..2080e29ceba 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -459,7 +459,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() chassert(numbers_storage.step != UInt64{0}); /// Build rpn of query filters - KeyCondition condition(filter_actions_dag, context, column_names, key_expression); + KeyCondition condition(filter_actions_dag ? &*filter_actions_dag : nullptr, context, column_names, key_expression); if (condition.extractPlainRanges(ranges)) { @@ -596,12 +596,12 @@ Pipe ReadFromSystemNumbersStep::makePipe() numbers_storage.step, step_between_chunks); - if (numbers_storage.limit && i == 0) + if (end && i == 0) { - auto rows_appr = itemCountInRange(numbers_storage.offset, *numbers_storage.limit, numbers_storage.step); - if (limit > 0 && limit < rows_appr) - rows_appr = query_info_limit; - source->addTotalRowsApprox(rows_appr); + UInt64 rows_approx = itemCountInRange(numbers_storage.offset, *end, numbers_storage.step); + if (limit > 0 && limit < rows_approx) + rows_approx = query_info_limit; + source->addTotalRowsApprox(rows_approx); } pipe.addSource(std::move(source)); diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.cpp b/src/Processors/QueryPlan/SourceStepWithFilter.cpp index ad0940b90b9..3de9ae37db0 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.cpp +++ b/src/Processors/QueryPlan/SourceStepWithFilter.cpp @@ -34,9 +34,8 @@ Block SourceStepWithFilter::applyPrewhereActions(Block block, const PrewhereInfo block.erase(prewhere_info->row_level_column_name); } - if (prewhere_info->prewhere_actions) { - block = prewhere_info->prewhere_actions->updateHeader(block); + block = prewhere_info->prewhere_actions.updateHeader(block); auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); if (!prewhere_column.type->canBeUsedInBooleanContext()) @@ -102,7 +101,6 @@ void SourceStepWithFilter::describeActions(FormatSettings & format_settings) con prefix.push_back(format_settings.indent_char); prefix.push_back(format_settings.indent_char); - if (prewhere_info->prewhere_actions) { format_settings.out << prefix << "Prewhere filter" << '\n'; format_settings.out << prefix << "Prewhere filter column: " << prewhere_info->prewhere_column_name; @@ -110,7 +108,7 @@ void SourceStepWithFilter::describeActions(FormatSettings & format_settings) con format_settings.out << " (removed)"; format_settings.out << '\n'; - auto expression = std::make_shared(prewhere_info->prewhere_actions); + auto expression = std::make_shared(prewhere_info->prewhere_actions.clone()); expression->describeActions(format_settings.out, prefix); } @@ -119,7 +117,7 @@ void SourceStepWithFilter::describeActions(FormatSettings & format_settings) con format_settings.out << prefix << "Row level filter" << '\n'; format_settings.out << prefix << "Row level filter column: " << prewhere_info->row_level_column_name << '\n'; - auto expression = std::make_shared(prewhere_info->row_level_filter); + auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); expression->describeActions(format_settings.out, prefix); } } @@ -132,12 +130,11 @@ void SourceStepWithFilter::describeActions(JSONBuilder::JSONMap & map) const std::unique_ptr prewhere_info_map = std::make_unique(); prewhere_info_map->add("Need filter", prewhere_info->need_filter); - if (prewhere_info->prewhere_actions) { std::unique_ptr prewhere_filter_map = std::make_unique(); prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); - auto expression = std::make_shared(prewhere_info->prewhere_actions); + auto expression = std::make_shared(prewhere_info->prewhere_actions.clone()); prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); @@ -147,7 +144,7 @@ void SourceStepWithFilter::describeActions(JSONBuilder::JSONMap & map) const { std::unique_ptr row_level_filter_map = std::make_unique(); row_level_filter_map->add("Row level filter column", prewhere_info->row_level_column_name); - auto expression = std::make_shared(prewhere_info->row_level_filter); + auto expression = std::make_shared(prewhere_info->row_level_filter->clone()); row_level_filter_map->add("Row level filter expression", expression->toTree()); prewhere_info_map->add("Row level filter", std::move(row_level_filter_map)); diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.h b/src/Processors/QueryPlan/SourceStepWithFilter.h index ca4ea4f3704..6cea5fd7245 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.h +++ b/src/Processors/QueryPlan/SourceStepWithFilter.h @@ -33,7 +33,8 @@ public: { } - const ActionsDAGPtr & getFilterActionsDAG() const { return filter_actions_dag; } + const std::optional & getFilterActionsDAG() const { return filter_actions_dag; } + std::optional detachFilterActionsDAG() { return std::move(filter_actions_dag); } const SelectQueryInfo & getQueryInfo() const { return query_info; } const PrewhereInfoPtr & getPrewhereInfo() const { return prewhere_info; } @@ -44,9 +45,9 @@ public: const Names & requiredSourceColumns() const { return required_source_columns; } - void addFilter(ActionsDAGPtr filter_dag, std::string column_name) + void addFilter(ActionsDAG filter_dag, std::string column_name) { - filter_nodes.nodes.push_back(&filter_dag->findInOutputs(column_name)); + filter_nodes.nodes.push_back(&filter_dag.findInOutputs(column_name)); filter_dags.push_back(std::move(filter_dag)); } @@ -59,7 +60,7 @@ public: void applyFilters() { applyFilters(std::move(filter_nodes)); - filter_dags = {}; + filter_dags.clear(); } virtual void applyFilters(ActionDAGNodes added_filter_nodes); @@ -80,12 +81,12 @@ protected: ContextPtr context; std::optional limit; - ActionsDAGPtr filter_actions_dag; + std::optional filter_actions_dag; private: /// Will be cleared after applyFilters() is called. ActionDAGNodes filter_nodes; - std::vector filter_dags; + std::vector filter_dags; }; } diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index 10384f6296a..2554053064f 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -29,7 +29,7 @@ TotalsHavingStep::TotalsHavingStep( const DataStream & input_stream_, const AggregateDescriptions & aggregates_, bool overflow_row_, - const ActionsDAGPtr & actions_dag_, + std::optional actions_dag_, const std::string & filter_column_, bool remove_filter_, TotalsMode totals_mode_, @@ -39,7 +39,7 @@ TotalsHavingStep::TotalsHavingStep( input_stream_, TotalsHavingTransform::transformHeader( input_stream_.header, - actions_dag_.get(), + actions_dag_ ? &*actions_dag_ : nullptr, filter_column_, remove_filter_, final_, @@ -47,7 +47,7 @@ TotalsHavingStep::TotalsHavingStep( getTraits(!filter_column_.empty())) , aggregates(aggregates_) , overflow_row(overflow_row_) - , actions_dag(actions_dag_) + , actions_dag(std::move(actions_dag_)) , filter_column_name(filter_column_) , remove_filter(remove_filter_) , totals_mode(totals_mode_) @@ -58,7 +58,7 @@ TotalsHavingStep::TotalsHavingStep( void TotalsHavingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { - auto expression_actions = actions_dag ? std::make_shared(actions_dag, settings.getActionsSettings()) : nullptr; + auto expression_actions = actions_dag ? std::make_shared(std::move(*actions_dag), settings.getActionsSettings()) : nullptr; auto totals_having = std::make_shared( pipeline.getHeader(), @@ -101,13 +101,16 @@ void TotalsHavingStep::describeActions(FormatSettings & settings) const if (actions_dag) { bool first = true; - auto expression = std::make_shared(actions_dag); - for (const auto & action : expression->getActions()) + if (actions_dag) { - settings.out << prefix << (first ? "Actions: " - : " "); - first = false; - settings.out << action.toString() << '\n'; + auto expression = std::make_shared(actions_dag->clone()); + for (const auto & action : expression->getActions()) + { + settings.out << prefix << (first ? "Actions: " + : " "); + first = false; + settings.out << action.toString() << '\n'; + } } } } @@ -118,8 +121,11 @@ void TotalsHavingStep::describeActions(JSONBuilder::JSONMap & map) const if (actions_dag) { map.add("Filter column", filter_column_name); - auto expression = std::make_shared(actions_dag); - map.add("Expression", expression->toTree()); + if (actions_dag) + { + auto expression = std::make_shared(actions_dag->clone()); + map.add("Expression", expression->toTree()); + } } } @@ -129,7 +135,7 @@ void TotalsHavingStep::updateOutputStream() input_streams.front(), TotalsHavingTransform::transformHeader( input_streams.front().header, - actions_dag.get(), + getActions(), filter_column_name, remove_filter, final, diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h index a81bc7bb1a9..4b414d41c57 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.h +++ b/src/Processors/QueryPlan/TotalsHavingStep.h @@ -1,13 +1,11 @@ #pragma once #include #include +#include namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; - enum class TotalsMode : uint8_t; /// Execute HAVING and calculate totals. See TotalsHavingTransform. @@ -18,7 +16,7 @@ public: const DataStream & input_stream_, const AggregateDescriptions & aggregates_, bool overflow_row_, - const ActionsDAGPtr & actions_dag_, + std::optional actions_dag_, const std::string & filter_column_, bool remove_filter_, TotalsMode totals_mode_, @@ -32,7 +30,7 @@ public: void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; - const ActionsDAGPtr & getActions() const { return actions_dag; } + const ActionsDAG * getActions() const { return actions_dag ? &*actions_dag : nullptr; } private: void updateOutputStream() override; @@ -40,7 +38,7 @@ private: const AggregateDescriptions aggregates; bool overflow_row; - ActionsDAGPtr actions_dag; + std::optional actions_dag; String filter_column_name; bool remove_filter; TotalsMode totals_mode; diff --git a/src/Processors/QueryPlan/WindowStep.h b/src/Processors/QueryPlan/WindowStep.h index 74a0e5930c7..d79cd7fd45e 100644 --- a/src/Processors/QueryPlan/WindowStep.h +++ b/src/Processors/QueryPlan/WindowStep.h @@ -6,9 +6,6 @@ namespace DB { -class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; - class WindowTransform; class WindowStep : public ITransformingStep diff --git a/src/Processors/SourceWithKeyCondition.h b/src/Processors/SourceWithKeyCondition.h index ee155d6f78c..cfd3eb236b7 100644 --- a/src/Processors/SourceWithKeyCondition.h +++ b/src/Processors/SourceWithKeyCondition.h @@ -16,13 +16,13 @@ protected: /// Represents pushed down filters in source std::shared_ptr key_condition; - void setKeyConditionImpl(const ActionsDAGPtr & filter_actions_dag, ContextPtr context, const Block & keys) + void setKeyConditionImpl(const std::optional & filter_actions_dag, ContextPtr context, const Block & keys) { key_condition = std::make_shared( - filter_actions_dag, + filter_actions_dag ? &*filter_actions_dag : nullptr, context, keys.getNames(), - std::make_shared(std::make_shared(keys.getColumnsWithTypeAndName()))); + std::make_shared(ActionsDAG(keys.getColumnsWithTypeAndName()))); } public: @@ -33,6 +33,6 @@ public: virtual void setKeyCondition(const std::shared_ptr & key_condition_) { key_condition = key_condition_; } /// Set key_condition created by filter_actions_dag and context. - virtual void setKeyCondition(const ActionsDAGPtr & /*filter_actions_dag*/, ContextPtr /*context*/) { } + virtual void setKeyCondition(const std::optional & /*filter_actions_dag*/, ContextPtr /*context*/) { } }; } diff --git a/src/Processors/Transforms/AddingDefaultsTransform.cpp b/src/Processors/Transforms/AddingDefaultsTransform.cpp index 7945b3999c1..da4d3a0041b 100644 --- a/src/Processors/Transforms/AddingDefaultsTransform.cpp +++ b/src/Processors/Transforms/AddingDefaultsTransform.cpp @@ -178,7 +178,7 @@ void AddingDefaultsTransform::transform(Chunk & chunk) auto dag = evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false); if (dag) { - auto actions = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes), true); + auto actions = std::make_shared(std::move(*dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes), true); actions->execute(evaluate_block); } diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index bb38c3e1dc5..9601f821cc8 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -203,7 +203,7 @@ FillingTransform::FillingTransform( , use_with_fill_by_sorting_prefix(use_with_fill_by_sorting_prefix_) { if (interpolate_description) - interpolate_actions = std::make_shared(interpolate_description->actions); + interpolate_actions = std::make_shared(interpolate_description->actions.clone()); std::vector is_fill_column(header_.columns()); for (size_t i = 0, size = fill_description.size(); i < size; ++i) diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index fb3b2faa9c5..e96a75d277b 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -34,13 +34,20 @@ namespace ErrorCodes namespace { -FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns) +FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, JoinStrictness strictness) { SortDescription desc; desc.reserve(columns.size()); for (const auto & name : columns) desc.emplace_back(name); - return std::make_unique(block, desc); + return std::make_unique(block, desc, strictness == JoinStrictness::Asof); +} + +bool ALWAYS_INLINE isNullAt(const IColumn & column, size_t row) +{ + if (const auto * nullable_column = checkAndGetColumn(&column)) + return nullable_column->isNullAt(row); + return false; } template @@ -54,7 +61,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, if (left_nullable && right_nullable) { int res = left_nullable->compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint); - if (res) + if (res != 0) return res; /// NULL != NULL case @@ -90,9 +97,10 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos, const SortCursorImpl & rhs, size_t rpos, + size_t key_length, int null_direction_hint) { - for (size_t i = 0; i < lhs.sort_columns_size; ++i) + for (size_t i = 0; i < key_length; ++i) { /// TODO(@vdimir): use nullableCompareAt only if there's nullable columns int cmp = nullableCompareAt(*lhs.sort_columns[i], *rhs.sort_columns[i], lpos, rpos, null_direction_hint); @@ -104,13 +112,18 @@ int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos, int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImpl & rhs, int null_direction_hint) { - return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), null_direction_hint); + return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint); +} + +int compareAsofCursors(const FullMergeJoinCursor & lhs, const FullMergeJoinCursor & rhs, int null_direction_hint) +{ + return nullableCompareAt(*lhs.getAsofColumn(), *rhs.getAsofColumn(), lhs->getRow(), rhs->getRow(), null_direction_hint); } bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint) { /// The last row of left cursor is less than the current row of the right cursor. - int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), null_direction_hint); + int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint); return cmp < 0; } @@ -222,25 +235,136 @@ Chunk getRowFromChunk(const Chunk & chunk, size_t pos) return result; } -void inline addRange(PaddedPODArray & left_map, size_t start, size_t end) +void inline addRange(PaddedPODArray & values, UInt64 start, UInt64 end) { assert(end > start); - for (size_t i = start; i < end; ++i) - left_map.push_back(i); + for (UInt64 i = start; i < end; ++i) + values.push_back(i); } -void inline addMany(PaddedPODArray & left_or_right_map, size_t idx, size_t num) +void inline addMany(PaddedPODArray & values, UInt64 value, size_t num) { - for (size_t i = 0; i < num; ++i) - left_or_right_map.push_back(idx); + values.resize_fill(values.size() + num, value); } } -FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_) - : sample_block(materializeBlock(sample_block_).cloneEmpty()), desc(description_) +JoinKeyRow::JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos) { + row.reserve(cursor->sort_columns.size()); + for (const auto & col : cursor->sort_columns) + { + auto new_col = col->cloneEmpty(); + new_col->insertFrom(*col, pos); + row.push_back(std::move(new_col)); + } + if (const IColumn * asof_column = cursor.getAsofColumn()) + { + if (const auto * nullable_asof_column = checkAndGetColumn(asof_column)) + { + /// We save matched column, and since NULL do not match anything, we can't use it as a key + chassert(!nullable_asof_column->isNullAt(pos)); + asof_column = nullable_asof_column->getNestedColumnPtr().get(); + } + auto new_col = asof_column->cloneEmpty(); + new_col->insertFrom(*asof_column, pos); + row.push_back(std::move(new_col)); + } } +void JoinKeyRow::reset() +{ + row.clear(); +} + +bool JoinKeyRow::equals(const FullMergeJoinCursor & cursor) const +{ + if (row.empty()) + return false; + + for (size_t i = 0; i < cursor->sort_columns_size; ++i) + { + // int cmp = this->row[i]->compareAt(0, cursor->getRow(), *(cursor->sort_columns[i]), cursor->desc[i].nulls_direction); + int cmp = nullableCompareAt(*this->row[i], *cursor->sort_columns[i], 0, cursor->getRow(), cursor->desc[i].nulls_direction); + if (cmp != 0) + return false; + } + return true; +} + +bool JoinKeyRow::asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const +{ + chassert(this->row.size() == cursor->sort_columns_size + 1); + if (!equals(cursor)) + return false; + + const auto & asof_row = row.back(); + if (isNullAt(*asof_row, 0) || isNullAt(*cursor.getAsofColumn(), cursor->getRow())) + return false; + + int cmp = 0; + if (const auto * nullable_column = checkAndGetColumn(cursor.getAsofColumn())) + cmp = nullable_column->getNestedColumn().compareAt(cursor->getRow(), 0, *asof_row, 1); + else + cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *asof_row, 1); + + return (asof_inequality == ASOFJoinInequality::Less && cmp < 0) + || (asof_inequality == ASOFJoinInequality::LessOrEquals && cmp <= 0) + || (asof_inequality == ASOFJoinInequality::Greater && cmp > 0) + || (asof_inequality == ASOFJoinInequality::GreaterOrEquals && cmp >= 0); +} + +void AnyJoinState::set(size_t source_num, const FullMergeJoinCursor & cursor) +{ + assert(cursor->rows); + keys[source_num] = JoinKeyRow(cursor, cursor->rows - 1); +} + +void AnyJoinState::reset(size_t source_num) +{ + keys[source_num].reset(); + value.clear(); +} + +void AnyJoinState::setValue(Chunk value_) +{ + value = std::move(value_); +} + +bool AnyJoinState::empty() const { return keys[0].row.empty() && keys[1].row.empty(); } + + +void AsofJoinState::set(const FullMergeJoinCursor & rcursor, size_t rpos) +{ + key = JoinKeyRow(rcursor, rpos); + value = rcursor.getCurrent().clone(); + value_row = rpos; +} + +void AsofJoinState::reset() +{ + key.reset(); + value.clear(); +} + +FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof) + : sample_block(materializeBlock(sample_block_).cloneEmpty()) + , desc(description_) +{ + if (desc.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty sort description for FullMergeJoinCursor"); + + if (is_asof) + { + /// For ASOF join prefix of sort description is used for equality comparison + /// and the last column is used for inequality comparison and is handled separately + + auto asof_column_description = desc.back(); + desc.pop_back(); + + chassert(asof_column_description.direction == 1 && asof_column_description.nulls_direction == 1); + asof_column_position = sample_block.getPositionByName(asof_column_description.column_name); + } +} const Chunk & FullMergeJoinCursor::getCurrent() const { @@ -278,48 +402,103 @@ bool FullMergeJoinCursor::fullyCompleted() const return !cursor.isValid() && recieved_all_blocks; } +String FullMergeJoinCursor::dump() const +{ + Strings row_dump; + if (cursor.isValid()) + { + Field val; + for (size_t i = 0; i < cursor.sort_columns_size; ++i) + { + cursor.sort_columns[i]->get(cursor.getRow(), val); + row_dump.push_back(val.dump()); + } + + if (const auto * asof_column = getAsofColumn()) + { + asof_column->get(cursor.getRow(), val); + row_dump.push_back(val.dump()); + } + } + + return fmt::format("<{}/{}{}>[{}]", + cursor.getRow(), cursor.rows, + recieved_all_blocks ? "(finished)" : "", + fmt::join(row_dump, ", ")); +} + MergeJoinAlgorithm::MergeJoinAlgorithm( - JoinPtr table_join_, + JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, const Blocks & input_headers, size_t max_block_size_) - : table_join(table_join_) + : kind(kind_) + , strictness(strictness_) , max_block_size(max_block_size_) , log(getLogger("MergeJoinAlgorithm")) { if (input_headers.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs"); - auto strictness = table_join->getTableJoin().strictness(); - if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All) + if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All && strictness != JoinStrictness::Asof) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for strictness {}", strictness); - auto kind = table_join->getTableJoin().kind(); + if (strictness == JoinStrictness::Asof) + { + if (kind != JoinKind::Left && kind != JoinKind::Inner) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not implement ASOF {} join", kind); + } + if (!isInner(kind) && !isLeft(kind) && !isRight(kind) && !isFull(kind)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for kind {}", kind); - const auto & join_on = table_join->getTableJoin().getOnlyClause(); - - if (join_on.on_filter_condition_left || join_on.on_filter_condition_right) + if (on_clause_.on_filter_condition_left || on_clause_.on_filter_condition_right) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions"); cursors = { - createCursor(input_headers[0], join_on.key_names_left), - createCursor(input_headers[1], join_on.key_names_right) + createCursor(input_headers[0], on_clause_.key_names_left, strictness), + createCursor(input_headers[1], on_clause_.key_names_right, strictness), }; +} - for (const auto & [left_key, right_key] : table_join->getTableJoin().leftToRightKeyRemap()) +MergeJoinAlgorithm::MergeJoinAlgorithm( + JoinPtr join_ptr, + const Blocks & input_headers, + size_t max_block_size_) + : MergeJoinAlgorithm( + join_ptr->getTableJoin().kind(), + join_ptr->getTableJoin().strictness(), + join_ptr->getTableJoin().getOnlyClause(), + input_headers, + max_block_size_) +{ + for (const auto & [left_key, right_key] : join_ptr->getTableJoin().leftToRightKeyRemap()) { size_t left_idx = input_headers[0].getPositionByName(left_key); size_t right_idx = input_headers[1].getPositionByName(right_key); left_to_right_key_remap[left_idx] = right_idx; } - const auto *smjPtr = typeid_cast(table_join.get()); + const auto *smjPtr = typeid_cast(join_ptr.get()); if (smjPtr) { null_direction_hint = smjPtr->getNullDirection(); } + if (strictness == JoinStrictness::Asof) + setAsofInequality(join_ptr->getTableJoin().getAsofInequality()); +} + +void MergeJoinAlgorithm::setAsofInequality(ASOFJoinInequality asof_inequality_) +{ + if (strictness != JoinStrictness::Asof) + throw Exception(ErrorCodes::LOGICAL_ERROR, "setAsofInequality is only supported for ASOF joins"); + + if (asof_inequality_ == ASOFJoinInequality::None) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ASOF inequality cannot be None"); + + asof_inequality = asof_inequality_; } void MergeJoinAlgorithm::logElapsed(double seconds) @@ -407,7 +586,7 @@ struct AllJoinImpl size_t lnum = nextDistinct(left_cursor.cursor); size_t rnum = nextDistinct(right_cursor.cursor); - bool all_fit_in_block = std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size; + bool all_fit_in_block = !max_block_size || std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size; bool have_all_ranges = left_cursor.cursor.isValid() && right_cursor.cursor.isValid(); if (all_fit_in_block && have_all_ranges) { @@ -421,7 +600,7 @@ struct AllJoinImpl else { assert(state == nullptr); - state = std::make_unique(left_cursor.cursor, lpos, right_cursor.cursor, rpos); + state = std::make_unique(left_cursor, lpos, right_cursor, rpos); state->addRange(0, left_cursor.getCurrent().clone(), lpos, lnum); state->addRange(1, right_cursor.getCurrent().clone(), rpos, rnum); return; @@ -466,6 +645,17 @@ void dispatchKind(JoinKind kind, Args && ... args) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported join kind: \"{}\"", kind); } +MutableColumns MergeJoinAlgorithm::getEmptyResultColumns() const +{ + MutableColumns result_cols; + for (size_t i = 0; i < 2; ++i) + { + for (const auto & col : cursors[i]->sampleColumns()) + result_cols.push_back(col->cloneEmpty()); + } + return result_cols; +} + std::optional MergeJoinAlgorithm::handleAllJoinState() { if (all_join_state && all_join_state->finished()) @@ -479,7 +669,7 @@ std::optional MergeJoinAlgorithm::handleAllJoinState /// Accumulate blocks with same key in all_join_state for (size_t i = 0; i < 2; ++i) { - if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(cursors[i]->cursor)) + if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(*cursors[i])) { size_t pos = cursors[i]->cursor.getRow(); size_t num = nextDistinct(cursors[i]->cursor); @@ -499,15 +689,10 @@ std::optional MergeJoinAlgorithm::handleAllJoinState stat.max_blocks_loaded = std::max(stat.max_blocks_loaded, all_join_state->blocksStored()); /// join all rows with current key - MutableColumns result_cols; - for (size_t i = 0; i < 2; ++i) - { - for (const auto & col : cursors[i]->sampleColumns()) - result_cols.push_back(col->cloneEmpty()); - } + MutableColumns result_cols = getEmptyResultColumns(); size_t total_rows = 0; - while (total_rows < max_block_size) + while (!max_block_size || total_rows < max_block_size) { const auto & left_range = all_join_state->getLeft(); const auto & right_range = all_join_state->getRight(); @@ -532,7 +717,52 @@ std::optional MergeJoinAlgorithm::handleAllJoinState return {}; } -MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin(JoinKind kind) +std::optional MergeJoinAlgorithm::handleAsofJoinState() +{ + if (strictness != JoinStrictness::Asof) + return {}; + + if (!cursors[1]->fullyCompleted()) + return {}; + + auto & left_cursor = *cursors[0]; + const auto & left_columns = left_cursor.getCurrent().getColumns(); + + MutableColumns result_cols = getEmptyResultColumns(); + + while (left_cursor->isValid() && asof_join_state.hasMatch(left_cursor, asof_inequality)) + { + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, left_cursor->getRow()); + for (const auto & col : asof_join_state.value.getColumns()) + result_cols[i++]->insertFrom(*col, asof_join_state.value_row); + chassert(i == result_cols.size()); + left_cursor->next(); + } + + while (isLeft(kind) && left_cursor->isValid()) + { + /// return row with default values at right side + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, left_cursor->getRow()); + for (; i < result_cols.size(); ++i) + result_cols[i]->insertDefault(); + chassert(i == result_cols.size()); + + left_cursor->next(); + } + + size_t result_rows = result_cols.empty() ? 0 : result_cols.front()->size(); + if (result_rows) + return Status(Chunk(std::move(result_cols), result_rows)); + + return {}; +} + + +MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin() { PaddedPODArray idx_map[2]; @@ -595,7 +825,7 @@ struct AnyJoinImpl FullMergeJoinCursor & right_cursor, PaddedPODArray & left_map, PaddedPODArray & right_map, - AnyJoinState & state, + AnyJoinState & any_join_state, int null_direction_hint) { assert(enabled); @@ -656,21 +886,21 @@ struct AnyJoinImpl } } - /// Remember index of last joined row to propagate it to next block + /// Remember last joined row to propagate it to next block - state.setValue({}); + any_join_state.setValue({}); if (!left_cursor->isValid()) { - state.set(0, left_cursor.cursor); + any_join_state.set(0, left_cursor); if (cmp == 0 && isLeft(kind)) - state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos)); + any_join_state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos)); } if (!right_cursor->isValid()) { - state.set(1, right_cursor.cursor); + any_join_state.set(1, right_cursor); if (cmp == 0 && isRight(kind)) - state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos)); + any_join_state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos)); } } }; @@ -680,40 +910,34 @@ std::optional MergeJoinAlgorithm::handleAnyJoinState if (any_join_state.empty()) return {}; - auto kind = table_join->getTableJoin().kind(); - Chunk result; for (size_t source_num = 0; source_num < 2; ++source_num) { auto & current = *cursors[source_num]; - auto & state = any_join_state; - if (any_join_state.keys[source_num].equals(current.cursor)) + if (any_join_state.keys[source_num].equals(current)) { size_t start_pos = current->getRow(); size_t length = nextDistinct(current.cursor); if (length && isLeft(kind) && source_num == 0) { - if (state.value) - result = copyChunkResized(current.getCurrent(), state.value, start_pos, length); + if (any_join_state.value) + result = copyChunkResized(current.getCurrent(), any_join_state.value, start_pos, length); else result = createBlockWithDefaults(source_num, start_pos, length); } if (length && isRight(kind) && source_num == 1) { - if (state.value) - result = copyChunkResized(state.value, current.getCurrent(), start_pos, length); + if (any_join_state.value) + result = copyChunkResized(any_join_state.value, current.getCurrent(), start_pos, length); else result = createBlockWithDefaults(source_num, start_pos, length); } - /// We've found row with other key, no need to skip more rows with current key if (current->isValid()) - { - state.keys[source_num].reset(); - } + any_join_state.keys[source_num].reset(); } else { @@ -726,7 +950,7 @@ std::optional MergeJoinAlgorithm::handleAnyJoinState return {}; } -MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind) +MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin() { if (auto result = handleAnyJoinState()) return std::move(*result); @@ -771,10 +995,151 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind) return Status(std::move(result)); } + +MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() +{ + auto & left_cursor = *cursors[0]; + if (!left_cursor->isValid()) + return Status(0); + + auto & right_cursor = *cursors[1]; + if (!right_cursor->isValid()) + return Status(1); + + const auto & left_columns = left_cursor.getCurrent().getColumns(); + const auto & right_columns = right_cursor.getCurrent().getColumns(); + + MutableColumns result_cols = getEmptyResultColumns(); + + while (left_cursor->isValid() && right_cursor->isValid()) + { + auto lpos = left_cursor->getRow(); + auto rpos = right_cursor->getRow(); + auto cmp = compareCursors(*left_cursor, *right_cursor, null_direction_hint); + if (cmp == 0) + { + if (isNullAt(*left_cursor.getAsofColumn(), lpos)) + cmp = -1; + if (isNullAt(*right_cursor.getAsofColumn(), rpos)) + cmp = 1; + } + + if (cmp == 0) + { + auto asof_cmp = compareAsofCursors(left_cursor, right_cursor, null_direction_hint); + + if ((asof_inequality == ASOFJoinInequality::Less && asof_cmp <= -1) + || (asof_inequality == ASOFJoinInequality::LessOrEquals && asof_cmp <= 0)) + { + /// First row in right table that is greater (or equal) than current row in left table + /// matches asof join condition the best + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (const auto & col : right_columns) + result_cols[i++]->insertFrom(*col, rpos); + chassert(i == result_cols.size()); + + left_cursor->next(); + continue; + } + + if (asof_inequality == ASOFJoinInequality::Less || asof_inequality == ASOFJoinInequality::LessOrEquals) + { + /// Asof condition is not (yet) satisfied, skip row in right table + right_cursor->next(); + continue; + } + + if ((asof_inequality == ASOFJoinInequality::Greater && asof_cmp >= 1) + || (asof_inequality == ASOFJoinInequality::GreaterOrEquals && asof_cmp >= 0)) + { + /// condition is satisfied, remember this row and move next to try to find better match + asof_join_state.set(right_cursor, rpos); + right_cursor->next(); + continue; + } + + if (asof_inequality == ASOFJoinInequality::Greater || asof_inequality == ASOFJoinInequality::GreaterOrEquals) + { + /// Asof condition is not satisfied anymore, use last matched row from right table + if (asof_join_state.hasMatch(left_cursor, asof_inequality)) + { + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (const auto & col : asof_join_state.value.getColumns()) + result_cols[i++]->insertFrom(*col, asof_join_state.value_row); + chassert(i == result_cols.size()); + } + else + { + asof_join_state.reset(); + if (isLeft(kind)) + { + /// return row with default values at right side + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (; i < result_cols.size(); ++i) + result_cols[i]->insertDefault(); + chassert(i == result_cols.size()); + } + } + left_cursor->next(); + continue; + } + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO: implement ASOF equality join"); + } + else if (cmp < 0) + { + if (asof_join_state.hasMatch(left_cursor, asof_inequality)) + { + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (const auto & col : asof_join_state.value.getColumns()) + result_cols[i++]->insertFrom(*col, asof_join_state.value_row); + chassert(i == result_cols.size()); + left_cursor->next(); + continue; + } + else + { + asof_join_state.reset(); + } + + /// no matches for rows in left table, just pass them through + size_t num = nextDistinct(*left_cursor); + + if (isLeft(kind) && num) + { + /// return them with default values at right side + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertRangeFrom(*col, lpos, num); + for (; i < result_cols.size(); ++i) + result_cols[i]->insertManyDefaults(num); + chassert(i == result_cols.size()); + } + } + else + { + /// skip rows in right table until we find match for current row in left table + nextDistinct(*right_cursor); + } + } + size_t num_rows = result_cols.empty() ? 0 : result_cols.front()->size(); + return Status(Chunk(std::move(result_cols), num_rows)); +} + + /// if `source_num == 0` get data from left cursor and fill defaults at right /// otherwise - vice versa Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const { + ColumnRawPtrs cols; { const auto & columns_left = source_num == 0 ? cursors[0]->getCurrent().getColumns() : cursors[0]->sampleColumns(); @@ -797,7 +1162,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t star cols.push_back(col.get()); } } - Chunk result_chunk; copyColumnsResized(cols, start, num_rows, result_chunk); return result_chunk; @@ -813,7 +1177,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num) IMergingAlgorithm::Status MergeJoinAlgorithm::merge() { - auto kind = table_join->getTableJoin().kind(); if (!cursors[0]->cursor.isValid() && !cursors[0]->fullyCompleted()) return Status(0); @@ -821,11 +1184,11 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted()) return Status(1); - if (auto result = handleAllJoinState()) - { return std::move(*result); - } + + if (auto result = handleAsofJoinState()) + return std::move(*result); if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted()) { @@ -839,7 +1202,7 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() } /// check if blocks are not intersecting at all - if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0) + if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0 && strictness != JoinStrictness::Asof) { if (cmp < 0) { @@ -858,13 +1221,14 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() } } - auto strictness = table_join->getTableJoin().strictness(); - if (strictness == JoinStrictness::Any) - return anyJoin(kind); + return anyJoin(); if (strictness == JoinStrictness::All) - return allJoin(kind); + return allJoin(); + + if (strictness == JoinStrictness::Asof) + return asofJoin(); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported strictness '{}'", strictness); } @@ -883,9 +1247,26 @@ MergeJoinTransform::MergeJoinTransform( /* always_read_till_end_= */ false, /* empty_chunk_on_finish_= */ true, table_join, input_headers, max_block_size) - , log(getLogger("MergeJoinTransform")) { - LOG_TRACE(log, "Use MergeJoinTransform"); +} + +MergeJoinTransform::MergeJoinTransform( + JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, + const Blocks & input_headers, + const Block & output_header, + size_t max_block_size, + UInt64 limit_hint_) + : IMergingTransform( + input_headers, + output_header, + /* have_all_inputs_= */ true, + limit_hint_, + /* always_read_till_end_= */ false, + /* empty_chunk_on_finish_= */ true, + kind_, strictness_, on_clause_, input_headers, max_block_size) +{ } void MergeJoinTransform::onFinish() diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index 5ca6b076544..d37a0b9f3ae 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -19,6 +20,7 @@ #include #include #include +#include namespace Poco { class Logger; } @@ -35,57 +37,28 @@ using FullMergeJoinCursorPtr = std::unique_ptr; /// Used instead of storing previous block struct JoinKeyRow { - std::vector row; - JoinKeyRow() = default; - explicit JoinKeyRow(const SortCursorImpl & impl_, size_t pos) - { - row.reserve(impl_.sort_columns.size()); - for (const auto & col : impl_.sort_columns) - { - auto new_col = col->cloneEmpty(); - new_col->insertFrom(*col, pos); - row.push_back(std::move(new_col)); - } - } + JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos); - void reset() - { - row.clear(); - } + bool equals(const FullMergeJoinCursor & cursor) const; + bool asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const; - bool equals(const SortCursorImpl & impl) const - { - if (row.empty()) - return false; + void reset(); - assert(this->row.size() == impl.sort_columns_size); - for (size_t i = 0; i < impl.sort_columns_size; ++i) - { - int cmp = this->row[i]->compareAt(0, impl.getRow(), *impl.sort_columns[i], impl.desc[i].nulls_direction); - if (cmp != 0) - return false; - } - return true; - } + std::vector row; }; /// Remembers previous key if it was joined in previous block class AnyJoinState : boost::noncopyable { public: - AnyJoinState() = default; + void set(size_t source_num, const FullMergeJoinCursor & cursor); + void setValue(Chunk value_); - void set(size_t source_num, const SortCursorImpl & cursor) - { - assert(cursor.rows); - keys[source_num] = JoinKeyRow(cursor, cursor.rows - 1); - } + void reset(size_t source_num); - void setValue(Chunk value_) { value = std::move(value_); } - - bool empty() const { return keys[0].row.empty() && keys[1].row.empty(); } + bool empty() const; /// current keys JoinKeyRow keys[2]; @@ -118,8 +91,8 @@ public: Chunk chunk; }; - AllJoinState(const SortCursorImpl & lcursor, size_t lpos, - const SortCursorImpl & rcursor, size_t rpos) + AllJoinState(const FullMergeJoinCursor & lcursor, size_t lpos, + const FullMergeJoinCursor & rcursor, size_t rpos) : keys{JoinKeyRow(lcursor, lpos), JoinKeyRow(rcursor, rpos)} { } @@ -187,13 +160,32 @@ private: size_t ridx = 0; }; + +class AsofJoinState : boost::noncopyable +{ +public: + void set(const FullMergeJoinCursor & rcursor, size_t rpos); + void reset(); + + bool hasMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const + { + if (value.empty()) + return false; + return key.asofMatch(cursor, asof_inequality); + } + + JoinKeyRow key; + Chunk value; + size_t value_row = 0; +}; + /* * Wrapper for SortCursorImpl */ class FullMergeJoinCursor : boost::noncopyable { public: - explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_); + explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof = false); bool fullyCompleted() const; void setChunk(Chunk && chunk); @@ -203,17 +195,31 @@ public: SortCursorImpl * operator-> () { return &cursor; } const SortCursorImpl * operator-> () const { return &cursor; } + SortCursorImpl & operator* () { return cursor; } + const SortCursorImpl & operator* () const { return cursor; } + SortCursorImpl cursor; const Block & sampleBlock() const { return sample_block; } Columns sampleColumns() const { return sample_block.getColumns(); } + const IColumn * getAsofColumn() const + { + if (!asof_column_position) + return nullptr; + return cursor.all_columns[*asof_column_position]; + } + + String dump() const; + private: Block sample_block; SortDescription desc; Chunk current_chunk; bool recieved_all_blocks = false; + + std::optional asof_column_position; }; /* @@ -223,22 +229,33 @@ private: class MergeJoinAlgorithm final : public IMergingAlgorithm { public: - explicit MergeJoinAlgorithm(JoinPtr table_join, const Blocks & input_headers, size_t max_block_size_); + MergeJoinAlgorithm(JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, + const Blocks & input_headers, + size_t max_block_size_); + + MergeJoinAlgorithm(JoinPtr join_ptr, const Blocks & input_headers, size_t max_block_size_); const char * getName() const override { return "MergeJoinAlgorithm"; } void initialize(Inputs inputs) override; void consume(Input & input, size_t source_num) override; Status merge() override; - void logElapsed(double seconds); + void setAsofInequality(ASOFJoinInequality asof_inequality_); + void logElapsed(double seconds); private: std::optional handleAnyJoinState(); - Status anyJoin(JoinKind kind); + Status anyJoin(); std::optional handleAllJoinState(); - Status allJoin(JoinKind kind); + Status allJoin(); + std::optional handleAsofJoinState(); + Status asofJoin(); + + MutableColumns getEmptyResultColumns() const; Chunk createBlockWithDefaults(size_t source_num); Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const; @@ -246,12 +263,15 @@ private: std::unordered_map left_to_right_key_remap; std::array cursors; + ASOFJoinInequality asof_inequality = ASOFJoinInequality::None; - /// Keep some state to make connection between data in different blocks + /// Keep some state to make handle data from different blocks AnyJoinState any_join_state; std::unique_ptr all_join_state; + AsofJoinState asof_join_state; - JoinPtr table_join; + JoinKind kind; + JoinStrictness strictness; size_t max_block_size; int null_direction_hint = 1; @@ -281,12 +301,21 @@ public: size_t max_block_size, UInt64 limit_hint = 0); + MergeJoinTransform( + JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, + const Blocks & input_headers, + const Block & output_header, + size_t max_block_size, + UInt64 limit_hint_ = 0); + String getName() const override { return "MergeJoinTransform"; } + void setAsofInequality(ASOFJoinInequality asof_inequality_) { algorithm.setAsofInequality(asof_inequality_); } + protected: void onFinish() override; - - LoggerPtr log; }; } diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 86421adf4fb..2b255c5120e 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -56,7 +56,10 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int NOT_IMPLEMENTED; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } // Interface for true window functions. It's not much of an interface, they just @@ -1710,7 +1713,7 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc { if (parameters_.size() != 1) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes exactly one parameter", name_); } return applyVisitor(FieldVisitorConvertToNumber(), parameters_[0]); @@ -1723,7 +1726,7 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc { if (argument_types.size() != 2) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes exactly two arguments", name_); } @@ -1807,7 +1810,7 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction { if (parameters_.size() != 1) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes exactly one parameter", name_); } return applyVisitor(FieldVisitorConvertToNumber(), parameters_[0]); @@ -1820,7 +1823,7 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction { if (argument_types.size() != 2) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes exactly two arguments", name_); } @@ -1882,7 +1885,7 @@ struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFu { if (parameters_.size() != 1) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes exactly one parameter", name_); } return applyVisitor(FieldVisitorConvertToNumber(), parameters_[0]); @@ -1895,7 +1898,7 @@ struct WindowFunctionExponentialTimeDecayedCount final : public StatefulWindowFu { if (argument_types.size() != 1) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes exactly one argument", name_); } @@ -1968,7 +1971,7 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc { if (parameters_.size() != 1) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes exactly one parameter", name_); } return applyVisitor(FieldVisitorConvertToNumber(), parameters_[0]); @@ -1981,7 +1984,7 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc { if (argument_types.size() != 2) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes exactly two arguments", name_); } @@ -2116,7 +2119,7 @@ struct WindowFunctionNtile final : public StatefulWindowFunction : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) { if (argument_types.size() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} takes exactly one argument", name_); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes exactly one argument", name_); auto type_id = argument_types[0]->getTypeId(); if (type_id != TypeIndex::UInt8 && type_id != TypeIndex::UInt16 && type_id != TypeIndex::UInt32 && type_id != TypeIndex::UInt64) @@ -2191,7 +2194,7 @@ namespace if (!buckets) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of 'ntile' funtcion must be greater than zero"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of 'ntile' function must be greater than zero"); } } // new partition @@ -2404,7 +2407,7 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction if (argument_types.size() > 3) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Function '{}' accepts at most 3 arguments, {} given", name, argument_types.size()); } @@ -2414,7 +2417,7 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction { if (argument_types_.empty()) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} takes at least one argument", name_); } @@ -2504,7 +2507,7 @@ struct WindowFunctionNthValue final : public WindowFunction { if (argument_types_.size() != 2) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes exactly two arguments", name_); } @@ -2578,7 +2581,7 @@ struct NonNegativeDerivativeParams if (argument_types.size() != 2 && argument_types.size() != 3) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes 2 or 3 arguments", name_); } diff --git a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp index ce5992c2548..364d7c69071 100644 --- a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp +++ b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp @@ -50,7 +50,7 @@ TEST(Processors, PortsNotConnected) processors->emplace_back(std::move(source)); processors->emplace_back(std::move(sink)); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD try { QueryStatusPtr element; diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp new file mode 100644 index 00000000000..f678d7984e8 --- /dev/null +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -0,0 +1,768 @@ +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + + +#include + +using namespace DB; + +namespace +{ + +QueryPipeline buildJoinPipeline( + std::shared_ptr left_source, + std::shared_ptr right_source, + size_t key_length = 1, + JoinKind kind = JoinKind::Inner, + JoinStrictness strictness = JoinStrictness::All, + ASOFJoinInequality asof_inequality = ASOFJoinInequality::None) +{ + Blocks inputs; + inputs.emplace_back(left_source->getPort().getHeader()); + inputs.emplace_back(right_source->getPort().getHeader()); + + Block out_header; + for (const auto & input : inputs) + { + for (ColumnWithTypeAndName column : input) + { + if (&input == &inputs.front()) + column.name = "t1." + column.name; + else + column.name = "t2." + column.name; + out_header.insert(column); + } + } + + TableJoin::JoinOnClause on_clause; + for (size_t i = 0; i < key_length; ++i) + { + on_clause.key_names_left.emplace_back(inputs[0].getByPosition(i).name); + on_clause.key_names_right.emplace_back(inputs[1].getByPosition(i).name); + } + + auto joining = std::make_shared( + kind, + strictness, + on_clause, + inputs, out_header, /* max_block_size = */ 0); + + if (asof_inequality != ASOFJoinInequality::None) + joining->setAsofInequality(asof_inequality); + + chassert(joining->getInputs().size() == 2); + + connect(left_source->getPort(), joining->getInputs().front()); + connect(right_source->getPort(), joining->getInputs().back()); + + auto * output_port = &joining->getOutputPort(); + + auto processors = std::make_shared(); + processors->emplace_back(std::move(left_source)); + processors->emplace_back(std::move(right_source)); + processors->emplace_back(std::move(joining)); + + QueryPipeline pipeline(QueryPlanResourceHolder{}, processors, output_port); + return pipeline; +} + + +std::shared_ptr oneColumnSource(const std::vector> & values) +{ + Block header = { + ColumnWithTypeAndName(std::make_shared(), "key"), + ColumnWithTypeAndName(std::make_shared(), "idx"), + }; + + UInt64 idx = 0; + Chunks chunks; + for (const auto & chunk_values : values) + { + auto key_column = ColumnUInt64::create(); + auto idx_column = ColumnUInt64::create(); + + for (auto n : chunk_values) + { + key_column->insertValue(n); + idx_column->insertValue(idx); + ++idx; + } + chunks.emplace_back(Chunk(Columns{std::move(key_column), std::move(idx_column)}, chunk_values.size())); + } + return std::make_shared(header, std::move(chunks)); +} + +class SourceChunksBuilder +{ +public: + + explicit SourceChunksBuilder(const Block & header_) + : header(header_) + { + current_chunk = header.cloneEmptyColumns(); + chassert(!current_chunk.empty()); + } + + void setBreakProbability(pcg64 & rng_) + { + /// random probability with possibility to have exact 0.0 and 1.0 values + break_prob = std::uniform_int_distribution(0, 5)(rng_) / static_cast(5); + rng = &rng_; + } + + void addRow(const std::vector & row) + { + chassert(row.size() == current_chunk.size()); + for (size_t i = 0; i < current_chunk.size(); ++i) + current_chunk[i]->insert(row[i]); + + if (rng && std::uniform_real_distribution<>(0.0, 1.0)(*rng) < break_prob) + addChunk(); + } + + void addChunk() + { + if (current_chunk.front()->empty()) + return; + + size_t rows = current_chunk.front()->size(); + chunks.emplace_back(std::move(current_chunk), rows); + current_chunk = header.cloneEmptyColumns(); + } + + std::shared_ptr getSource() + { + addChunk(); + + /// copy chunk to allow reusing same builder + Chunks chunks_copy; + chunks_copy.reserve(chunks.size()); + for (const auto & chunk : chunks) + chunks_copy.emplace_back(chunk.clone()); + return std::make_shared(header, std::move(chunks_copy)); + } + +private: + Block header; + Chunks chunks; + MutableColumns current_chunk; + + pcg64 * rng = nullptr; + double break_prob = 0.0; +}; + + +std::vector> getValuesFromBlock(const Block & block, const Names & names) +{ + std::vector> result; + for (size_t i = 0; i < block.rows(); ++i) + { + auto & row = result.emplace_back(); + for (const auto & name : names) + block.getByName(name).column->get(i, row.emplace_back()); + } + return result; +} + + +Block executePipeline(QueryPipeline && pipeline) +{ + PullingPipelineExecutor executor(pipeline); + + Blocks result_blocks; + while (true) + { + Block block; + bool is_ok = executor.pull(block); + if (!is_ok) + break; + result_blocks.emplace_back(std::move(block)); + } + + return concatenateBlocks(result_blocks); +} + +template +void assertColumnVectorEq(const typename ColumnVector::Container & expected, const Block & block, const std::string & name) +{ + const auto * actual = typeid_cast *>(block.getByName(name).column.get()); + ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector).name(); + + auto get_first_diff = [&]() -> String + { + const auto & actual_data = actual->getData(); + size_t num_rows = std::min(expected.size(), actual_data.size()); + for (size_t i = 0; i < num_rows; ++i) + { + if (expected[i] != actual_data[i]) + return fmt::format(", expected: {}, actual: {} at row {}", expected[i], actual_data[i], i); + } + return ""; + }; + + EXPECT_EQ(actual->getData().size(), expected.size()); + ASSERT_EQ(actual->getData(), expected) << "column name: " << name << get_first_diff(); +} + +template +void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name) +{ + const ColumnPtr & actual = block.getByName(name).column; + ASSERT_TRUE(checkColumn(*actual)); + ASSERT_TRUE(checkColumn(expected)); + EXPECT_EQ(actual->size(), expected.size()); + + auto dump_val = [](const IColumn & col, size_t i) -> String + { + Field value; + col.get(i, value); + return value.dump(); + }; + + size_t num_rows = std::min(actual->size(), expected.size()); + for (size_t i = 0; i < num_rows; ++i) + ASSERT_EQ(actual->compareAt(i, i, expected, 1), 0) << dump_val(*actual, i) << " != " << dump_val(expected, i) << " at row " << i; +} + +template +T getRandomFrom(pcg64 & rng, const std::initializer_list & opts) +{ + std::vector options(opts.begin(), opts.end()); + size_t idx = std::uniform_int_distribution(0, options.size() - 1)(rng); + return options[idx]; +} + +void generateNextKey(pcg64 & rng, UInt64 & k1, String & k2) +{ + size_t str_len = std::uniform_int_distribution<>(1, 10)(rng); + String new_k2 = getRandomASCIIString(str_len, rng); + if (new_k2.compare(k2) <= 0) + ++k1; + k2 = new_k2; +} + +bool isStrict(ASOFJoinInequality inequality) +{ + return inequality == ASOFJoinInequality::Less || inequality == ASOFJoinInequality::Greater; +} + +} + +class FullSortingJoinTest : public ::testing::Test +{ +public: + FullSortingJoinTest() = default; + + void SetUp() override + { + Poco::AutoPtr channel(new Poco::ConsoleChannel(std::cerr)); + Poco::Logger::root().setChannel(channel); + if (const char * test_log_level = std::getenv("TEST_LOG_LEVEL")) // NOLINT(concurrency-mt-unsafe) + Poco::Logger::root().setLevel(test_log_level); + else + Poco::Logger::root().setLevel("none"); + + + UInt64 seed = randomSeed(); + if (const char * random_seed = std::getenv("TEST_RANDOM_SEED")) // NOLINT(concurrency-mt-unsafe) + seed = std::stoull(random_seed); + std::cout << "TEST_RANDOM_SEED=" << seed << std::endl; + rng = pcg64(seed); + } + + void TearDown() override + { + } + + pcg64 rng; +}; + +TEST_F(FullSortingJoinTest, AllAnyOneKey) +try +{ + { + SCOPED_TRACE("Inner All"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {1, 2, 3, 4, 5} }), + oneColumnSource({ {1}, {2}, {3}, {4}, {5} }), + 1, JoinKind::Inner, JoinStrictness::All)); + + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {1, 2, 3, 4, 5} }), + oneColumnSource({ {1}, {2}, {3}, {4}, {5} }), + 1, JoinKind::Inner, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner All"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }), + oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }), + 1, JoinKind::Inner, JoinStrictness::All)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 0, 1, 2, 3, 3, 4, 5}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 3, 3, 4, 4, 4, 3, 4, 5, 5}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }), + oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }), + 1, JoinKind::Inner, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 5}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }), + oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }), + 1, JoinKind::Inner, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 5}), result, "t2.idx"); + } + { + + SCOPED_TRACE("Left Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }), + oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }), + 1, JoinKind::Left, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx"); + } + { + SCOPED_TRACE("Left Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }), + oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }), + 1, JoinKind::Left, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx"); + } +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + + +TEST_F(FullSortingJoinTest, AnySimple) +try +{ + JoinKind kind = getRandomFrom(rng, {JoinKind::Inner, JoinKind::Left, JoinKind::Right}); + + SourceChunksBuilder left_source({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "attr"}, + }); + + SourceChunksBuilder right_source({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "attr"}, + }); + + left_source.setBreakProbability(rng); + right_source.setBreakProbability(rng); + + size_t num_keys = std::uniform_int_distribution<>(100, 1000)(rng); + + auto expected_left = ColumnString::create(); + auto expected_right = ColumnString::create(); + + UInt64 k1 = 1; + String k2; + + auto get_attr = [&](const String & side, size_t idx) -> String + { + return toString(k1) + "_" + k2 + "_" + side + "_" + toString(idx); + }; + + for (size_t i = 0; i < num_keys; ++i) + { + generateNextKey(rng, k1, k2); + + /// Key is present in left, right or both tables. Both tables is more probable. + size_t key_presence = std::uniform_int_distribution<>(0, 10)(rng); + + size_t num_rows_left = key_presence == 0 ? 0 : std::uniform_int_distribution<>(1, 10)(rng); + for (size_t j = 0; j < num_rows_left; ++j) + left_source.addRow({k1, k2, get_attr("left", j)}); + + size_t num_rows_right = key_presence == 1 ? 0 : std::uniform_int_distribution<>(1, 10)(rng); + for (size_t j = 0; j < num_rows_right; ++j) + right_source.addRow({k1, k2, get_attr("right", j)}); + + String left_attr = num_rows_left ? get_attr("left", 0) : ""; + String right_attr = num_rows_right ? get_attr("right", 0) : ""; + + if (kind == JoinKind::Inner && num_rows_left && num_rows_right) + { + expected_left->insert(left_attr); + expected_right->insert(right_attr); + } + else if (kind == JoinKind::Left) + { + for (size_t j = 0; j < num_rows_left; ++j) + { + expected_left->insert(get_attr("left", j)); + expected_right->insert(right_attr); + } + } + else if (kind == JoinKind::Right) + { + for (size_t j = 0; j < num_rows_right; ++j) + { + expected_left->insert(left_attr); + expected_right->insert(get_attr("right", j)); + } + } + } + + Block result_block = executePipeline(buildJoinPipeline( + left_source.getSource(), right_source.getSource(), /* key_length = */ 2, + kind, JoinStrictness::Any)); + assertColumnEq(*expected_left, result_block, "t1.attr"); + assertColumnEq(*expected_right, result_block, "t2.attr"); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + +TEST_F(FullSortingJoinTest, AsofSimple) +try +{ + SourceChunksBuilder left_source({ + {std::make_shared(), "key"}, + {std::make_shared(), "t"}, + }); + left_source.addRow({"AMZN", 3}); + left_source.addRow({"AMZN", 4}); + left_source.addRow({"AMZN", 6}); + left_source.addRow({"SBUX", 10}); + + SourceChunksBuilder right_source({ + {std::make_shared(), "key"}, + {std::make_shared(), "t"}, + {std::make_shared(), "value"}, + }); + right_source.addRow({"AAPL", 1, 97}); + right_source.addChunk(); + right_source.addRow({"AAPL", 2, 98}); + right_source.addRow({"AAPL", 3, 99}); + right_source.addRow({"AMZN", 1, 100}); + right_source.addRow({"AMZN", 2, 110}); + right_source.addChunk(); + right_source.addRow({"AMZN", 2, 110}); + right_source.addChunk(); + right_source.addRow({"AMZN", 4, 130}); + right_source.addRow({"AMZN", 5, 140}); + right_source.addRow({"SBUX", 8, 180}); + right_source.addChunk(); + right_source.addRow({"SBUX", 9, 190}); + + { + Block result_block = executePipeline(buildJoinPipeline( + left_source.getSource(), right_source.getSource(), /* key_length = */ 2, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals)); + auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); + + ASSERT_EQ(values, (std::vector>{ + {"AMZN", 3u, 4u, 130u}, + {"AMZN", 4u, 4u, 130u}, + })); + } + + { + Block result_block = executePipeline(buildJoinPipeline( + left_source.getSource(), right_source.getSource(), /* key_length = */ 2, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals)); + auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); + + ASSERT_EQ(values, (std::vector>{ + {"AMZN", 3u, 2u, 110u}, + {"AMZN", 4u, 4u, 130u}, + {"AMZN", 6u, 5u, 140u}, + {"SBUX", 10u, 9u, 190u}, + })); + } +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + + +TEST_F(FullSortingJoinTest, AsofOnlyColumn) +try +{ + auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} }); + + SourceChunksBuilder right_source_builder({ + {std::make_shared(), "t"}, + {std::make_shared(), "value"}, + }); + + right_source_builder.setBreakProbability(rng); + + for (const auto & row : std::vector>{ {1, 101}, {2, 102}, {4, 104}, {5, 105}, {11, 111}, {15, 115} }) + right_source_builder.addRow(row); + + auto right_source = right_source_builder.getSource(); + + auto pipeline = buildJoinPipeline( + left_source, right_source, /* key_length = */ 1, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals); + + Block result_block = executePipeline(std::move(pipeline)); + + ASSERT_EQ( + assert_cast(result_block.getByName("t1.key").column.get())->getData(), + (ColumnUInt64::Container{3, 3, 3, 3, 3, 5, 5, 6, 9, 9, 10}) + ); + + ASSERT_EQ( + assert_cast(result_block.getByName("t2.t").column.get())->getData(), + (ColumnUInt64::Container{4, 4, 4, 4, 4, 5, 5, 11, 11, 11, 11}) + ); + + ASSERT_EQ( + assert_cast(result_block.getByName("t2.value").column.get())->getData(), + (ColumnUInt64::Container{104, 104, 104, 104, 104, 105, 105, 111, 111, 111, 111}) + ); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + +TEST_F(FullSortingJoinTest, AsofLessGeneratedTestData) +try +{ + /// Generate data random and build expected result at the same time. + + /// Test specific combinations of join kind and inequality per each run + auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); + auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals }); + + SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality)); + + /// Key is complex, `k1, k2` for equality and `t` for asof + SourceChunksBuilder left_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + SourceChunksBuilder right_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + /// How small generated block should be + left_source_builder.setBreakProbability(rng); + right_source_builder.setBreakProbability(rng); + + /// We are going to generate sorted data and remember expected result + ColumnInt64::Container expected; + + UInt64 k1 = 1; + String k2; + auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); + for (size_t key_num = 0; key_num < key_num_total; ++key_num) + { + /// Generate new key greater than previous + generateNextKey(rng, k1, k2); + + Int64 left_t = 0; + /// Generate several rows for the key + size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng); + for (size_t i = 0; i < num_left_rows; ++i) + { + /// t is strictly greater than previous + left_t += std::uniform_int_distribution<>(1, 10)(rng); + + auto left_arrtibute_value = 10 * left_t; + left_source_builder.addRow({k1, k2, left_t, left_arrtibute_value}); + expected.push_back(left_arrtibute_value); + + auto num_matches = 1 + std::poisson_distribution<>(4)(rng); + /// Generate several matches in the right table + auto right_t = left_t; + for (size_t j = 0; j < num_matches; ++j) + { + int min_step = isStrict(asof_inequality) ? 1 : 0; + right_t += std::uniform_int_distribution<>(min_step, 3)(rng); + + /// First row should match + bool is_match = j == 0; + right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * left_arrtibute_value : -1}); + } + /// Next left_t should be greater than right_t not to match with previous rows + left_t = right_t; + } + + /// generate some rows with greater left_t to check that they are not matched + num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0; + for (size_t i = 0; i < num_left_rows; ++i) + { + left_t += std::uniform_int_distribution<>(1, 10)(rng); + left_source_builder.addRow({k1, k2, left_t, -10 * left_t}); + + if (join_kind == JoinKind::Left) + expected.push_back(-10 * left_t); + } + } + + Block result_block = executePipeline(buildJoinPipeline( + left_source_builder.getSource(), right_source_builder.getSource(), + /* key_length = */ 3, + join_kind, JoinStrictness::Asof, asof_inequality)); + + assertColumnVectorEq(expected, result_block, "t1.attr"); + + for (auto & e : expected) + /// Non matched rows from left table have negative attr + /// Value if attribute in right table is 10 times greater than in left table + e = e < 0 ? 0 : 10 * e; + + assertColumnVectorEq(expected, result_block, "t2.attr"); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + +TEST_F(FullSortingJoinTest, AsofGreaterGeneratedTestData) +try +{ + /// Generate data random and build expected result at the same time. + + /// Test specific combinations of join kind and inequality per each run + auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); + auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals }); + + SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality)); + + SourceChunksBuilder left_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + SourceChunksBuilder right_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + left_source_builder.setBreakProbability(rng); + right_source_builder.setBreakProbability(rng); + + ColumnInt64::Container expected; + + UInt64 k1 = 1; + String k2; + UInt64 left_t = 0; + + auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); + for (size_t key_num = 0; key_num < key_num_total; ++key_num) + { + /// Generate new key greater than previous + generateNextKey(rng, k1, k2); + + /// Generate some rows with smaller left_t to check that they are not matched + size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0; + for (size_t i = 0; i < num_left_rows; ++i) + { + left_t += std::uniform_int_distribution<>(1, 10)(rng); + left_source_builder.addRow({k1, k2, left_t, -10 * left_t}); + + if (join_kind == JoinKind::Left) + expected.push_back(-10 * left_t); + } + + if (std::bernoulli_distribution(0.1)(rng)) + continue; + + size_t num_right_matches = std::uniform_int_distribution<>(1, 10)(rng); + auto right_t = left_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 0 : 1, 10)(rng); + auto attribute_value = 10 * right_t; + for (size_t j = 0; j < num_right_matches; ++j) + { + right_t += std::uniform_int_distribution<>(0, 3)(rng); + bool is_match = j == num_right_matches - 1; + right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * attribute_value : -1}); + } + + /// Next left_t should be greater than (or equals) right_t to match with previous rows + left_t = right_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 1 : 0, 100)(rng); + size_t num_left_matches = std::uniform_int_distribution<>(1, 100)(rng); + for (size_t j = 0; j < num_left_matches; ++j) + { + left_t += std::uniform_int_distribution<>(0, 3)(rng); + left_source_builder.addRow({k1, k2, left_t, attribute_value}); + expected.push_back(attribute_value); + } + } + + Block result_block = executePipeline(buildJoinPipeline( + left_source_builder.getSource(), right_source_builder.getSource(), + /* key_length = */ 3, + join_kind, JoinStrictness::Asof, asof_inequality)); + + assertColumnVectorEq(expected, result_block, "t1.attr"); + + for (auto & e : expected) + /// Non matched rows from left table have negative attr + /// Value if attribute in right table is 10 times greater than in left table + e = e < 0 ? 0 : 10 * e; + + assertColumnVectorEq(expected, result_block, "t2.attr"); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index 04a59cc3b7e..78e0b02c38b 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -8,7 +8,6 @@ #include #include #include -#include #include diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.h b/src/QueryPipeline/RemoteQueryExecutorReadContext.h index b8aa8bb9111..fbd55278f86 100644 --- a/src/QueryPipeline/RemoteQueryExecutorReadContext.h +++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.h @@ -69,7 +69,7 @@ private: /// * timer is a timerfd descriptor to manually check socket timeout /// * pipe_fd is a pipe we use to cancel query and socket polling by executor. /// We put those descriptors into our own epoll which is used by external executor. - TimerDescriptor timer{CLOCK_MONOTONIC, 0}; + TimerDescriptor timer; Poco::Timespan timeout; AsyncEventTimeoutType timeout_type; std::atomic_bool is_timer_alarmed = false; diff --git a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp index c8ab2e3a973..34bc2eb2b5e 100644 --- a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp +++ b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp @@ -133,7 +133,7 @@ TEST(CheckSortedTransform, CheckBadLastRow) EXPECT_NO_THROW(executor.pull(chunk)); EXPECT_NO_THROW(executor.pull(chunk)); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } @@ -158,7 +158,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock1) Chunk chunk; -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } @@ -181,7 +181,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock2) PullingPipelineExecutor executor(pipeline); Chunk chunk; -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } @@ -204,7 +204,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock3) PullingPipelineExecutor executor(pipeline); Chunk chunk; -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 07366d7cc07..c5dfe3e6e5f 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1,48 +1,48 @@ -#include -#include -#include #include #include #include #include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include +#include +#include +#include #include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include +#include +#include +#include #include -#include #include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include #include -#include #include #include @@ -61,6 +61,8 @@ #include +#include + using namespace std::literals; using namespace DB; @@ -666,7 +668,7 @@ void TCPHandler::runImpl() // Server should die on std logic errors in debug, like with assert() // or ErrorCodes::LOGICAL_ERROR. This helps catch these errors in // tests. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD catch (const std::logic_error & e) { state.io.onException(); @@ -961,8 +963,8 @@ void TCPHandler::processInsertQuery() if (settings.throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert && settings.deduplicate_blocks_in_dependent_materialized_views) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, - "Deduplication is dependent materialized view cannot work together with async inserts. "\ - "Please disable eiher `deduplicate_blocks_in_dependent_materialized_views` or `async_insert` setting."); + "Deduplication in dependent materialized view cannot work together with async inserts. "\ + "Please disable either `deduplicate_blocks_in_dependent_materialized_views` or `async_insert` setting."); auto result = processAsyncInsertQuery(*insert_queue); if (result.status == AsynchronousInsertQueue::PushResult::OK) @@ -1036,6 +1038,17 @@ void TCPHandler::processOrdinaryQuery() PullingAsyncPipelineExecutor executor(pipeline); CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread}; + /// The following may happen: + /// * current thread is holding the lock + /// * because of the exception we unwind the stack and call the destructor of `executor` + /// * the destructor calls cancel() and waits for all query threads to finish + /// * at the same time one of the query threads is trying to acquire the lock, e.g. inside `merge_tree_read_task_callback` + /// * deadlock + SCOPE_EXIT({ + if (out_lock.owns_lock()) + out_lock.unlock(); + }); + Block block; while (executor.pull(block, interactive_delay / 1000)) { @@ -1079,8 +1092,7 @@ void TCPHandler::processOrdinaryQuery() } /// This lock wasn't acquired before and we make .lock() call here - /// so everything under this line is covered even together - /// with sendProgress() out of the scope + /// so everything under this line is covered. out_lock.lock(); /** If data has run out, we will send the profiling data and total values to @@ -1107,6 +1119,7 @@ void TCPHandler::processOrdinaryQuery() last_sent_snapshots.clear(); } + out_lock.lock(); sendProgress(); } diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h index 4c8c7974005..3b4eff28307 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.h +++ b/src/Storages/Cache/ExternalDataSourceCache.h @@ -53,7 +53,7 @@ public: bool nextImpl() override; off_t seek(off_t off, int whence) override; off_t getPosition() override; - size_t getFileSize() override { return remote_file_size; } + std::optional tryGetFileSize() override { return remote_file_size; } private: std::unique_ptr local_file_holder; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 9063437c065..089fb5c585c 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -516,7 +516,7 @@ void StorageHive::initMinMaxIndexExpression() partition_names = partition_name_types.getNames(); partition_types = partition_name_types.getTypes(); partition_minmax_idx_expr = std::make_shared( - std::make_shared(partition_name_types), ExpressionActionsSettings::fromContext(getContext())); + ActionsDAG(partition_name_types), ExpressionActionsSettings::fromContext(getContext())); } NamesAndTypesList all_name_types = metadata_snapshot->getColumns().getAllPhysical(); @@ -526,7 +526,7 @@ void StorageHive::initMinMaxIndexExpression() hivefile_name_types.push_back(column); } hivefile_minmax_idx_expr = std::make_shared( - std::make_shared(hivefile_name_types), ExpressionActionsSettings::fromContext(getContext())); + ActionsDAG(hivefile_name_types), ExpressionActionsSettings::fromContext(getContext())); } ASTPtr StorageHive::extractKeyExpressionList(const ASTPtr & node) @@ -583,7 +583,7 @@ static HiveFilePtr createHiveFile( HiveFiles StorageHive::collectHiveFilesFromPartition( const Apache::Hadoop::Hive::Partition & partition, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, const ContextPtr & context_, @@ -681,7 +681,7 @@ StorageHive::listDirectory(const String & path, const HiveTableMetadataPtr & hiv HiveFilePtr StorageHive::getHiveFileIfNeeded( const FileInfo & file_info, const FieldVector & fields, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, const HiveTableMetadataPtr & hive_table_metadata, const ContextPtr & context_, PruneLevel prune_level) const @@ -828,7 +828,7 @@ void ReadFromHive::createFiles() if (hive_files) return; - hive_files = storage->collectHiveFiles(num_streams, filter_actions_dag, hive_table_metadata, fs, context); + hive_files = storage->collectHiveFiles(num_streams, filter_actions_dag ? &*filter_actions_dag : nullptr, hive_table_metadata, fs, context); LOG_INFO(log, "Collect {} hive files to read", hive_files->size()); } @@ -950,7 +950,7 @@ void ReadFromHive::initializePipeline(QueryPipelineBuilder & pipeline, const Bui HiveFiles StorageHive::collectHiveFiles( size_t max_threads, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, const ContextPtr & context_, @@ -1023,12 +1023,12 @@ SinkToStoragePtr StorageHive::write(const ASTPtr & /*query*/, const StorageMetad std::optional StorageHive::totalRows(const Settings & settings) const { /// query_info is not used when prune_level == PruneLevel::None - return totalRowsImpl(settings, nullptr, getContext(), PruneLevel::None); + return totalRowsImpl(settings, {}, getContext(), PruneLevel::None); } -std::optional StorageHive::totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) const +std::optional StorageHive::totalRowsByPartitionPredicate(const ActionsDAG & filter_actions_dag, ContextPtr context_) const { - return totalRowsImpl(context_->getSettingsRef(), filter_actions_dag, context_, PruneLevel::Partition); + return totalRowsImpl(context_->getSettingsRef(), &filter_actions_dag, context_, PruneLevel::Partition); } void StorageHive::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /*local_context*/) const @@ -1043,7 +1043,7 @@ void StorageHive::checkAlterIsPossible(const AlterCommands & commands, ContextPt } std::optional -StorageHive::totalRowsImpl(const Settings & settings, const ActionsDAGPtr & filter_actions_dag, ContextPtr context_, PruneLevel prune_level) const +StorageHive::totalRowsImpl(const Settings & settings, const ActionsDAG * filter_actions_dag, ContextPtr context_, PruneLevel prune_level) const { /// Row-based format like Text doesn't support totalRowsByPartitionPredicate if (!supportsSubsetOfColumns()) diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 8a457dd6e01..e16df22e138 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -57,7 +57,7 @@ public: bool supportsSubsetOfColumns() const; std::optional totalRows(const Settings & settings) const override; - std::optional totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) const override; + std::optional totalRowsByPartitionPredicate(const ActionsDAG & filter_actions_dag, ContextPtr context_) const override; void checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const override; protected: @@ -90,7 +90,7 @@ private: HiveFiles collectHiveFiles( size_t max_threads, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, const ContextPtr & context_, @@ -98,7 +98,7 @@ private: HiveFiles collectHiveFilesFromPartition( const Apache::Hadoop::Hive::Partition & partition, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, const ContextPtr & context_, @@ -107,7 +107,7 @@ private: HiveFilePtr getHiveFileIfNeeded( const FileInfo & file_info, const FieldVector & fields, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, const HiveTableMetadataPtr & hive_table_metadata, const ContextPtr & context_, PruneLevel prune_level = PruneLevel::Max) const; @@ -115,7 +115,7 @@ private: void lazyInitialize(); std::optional - totalRowsImpl(const Settings & settings, const ActionsDAGPtr & filter_actions_dag, ContextPtr context_, PruneLevel prune_level) const; + totalRowsImpl(const Settings & settings, const ActionsDAG * filter_actions_dag, ContextPtr context_, PruneLevel prune_level) const; String hive_metastore_url; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 3b9c6ff28fa..755d71df531 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -237,7 +237,7 @@ StorageID IStorage::getStorageID() const return storage_id; } -ConditionSelectivityEstimator IStorage::getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const +ConditionSelectivityEstimator IStorage::getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAG *, ContextPtr) const { return {}; } @@ -325,9 +325,8 @@ std::string PrewhereInfo::dump() const ss << "row_level_filter " << row_level_filter->dumpDAG() << "\n"; } - if (prewhere_actions) { - ss << "prewhere_actions " << prewhere_actions->dumpDAG() << "\n"; + ss << "prewhere_actions " << prewhere_actions.dumpDAG() << "\n"; } ss << "remove_prewhere_column " << remove_prewhere_column @@ -341,10 +340,8 @@ std::string FilterDAGInfo::dump() const WriteBufferFromOwnString ss; ss << "FilterDAGInfo for column '" << column_name <<"', do_remove_column " << do_remove_column << "\n"; - if (actions) - { - ss << "actions " << actions->dumpDAG() << "\n"; - } + + ss << "actions " << actions.dumpDAG() << "\n"; return ss.str(); } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 6217470780d..0477a08b0d2 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -135,7 +135,7 @@ public: /// Returns true if the storage supports queries with the PREWHERE section. virtual bool supportsPrewhere() const { return false; } - virtual ConditionSelectivityEstimator getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const; + virtual ConditionSelectivityEstimator getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAG *, ContextPtr) const; /// Returns which columns supports PREWHERE, or empty std::nullopt if all columns is supported. /// This is needed for engines whose aggregates data from multiple tables, like Merge. @@ -684,7 +684,7 @@ public: virtual std::optional totalRows(const Settings &) const { return {}; } /// Same as above but also take partition predicate into account. - virtual std::optional totalRowsByPartitionPredicate(const ActionsDAGPtr &, ContextPtr) const { return {}; } + virtual std::optional totalRowsByPartitionPredicate(const ActionsDAG &, ContextPtr) const { return {}; } /// If it is possible to quickly determine exact number of bytes for the table on storage: /// - memory (approximated, resident) diff --git a/src/Storages/KVStorageUtils.cpp b/src/Storages/KVStorageUtils.cpp index 94319aef3b8..88783246e10 100644 --- a/src/Storages/KVStorageUtils.cpp +++ b/src/Storages/KVStorageUtils.cpp @@ -231,7 +231,7 @@ bool traverseDAGFilter( } std::pair getFilterKeys( - const String & primary_key, const DataTypePtr & primary_key_type, const ActionsDAGPtr & filter_actions_dag, const ContextPtr & context) + const String & primary_key, const DataTypePtr & primary_key_type, const std::optional & filter_actions_dag, const ContextPtr & context) { if (!filter_actions_dag) return {{}, true}; diff --git a/src/Storages/KVStorageUtils.h b/src/Storages/KVStorageUtils.h index e20a1ce4f37..64108290270 100644 --- a/src/Storages/KVStorageUtils.h +++ b/src/Storages/KVStorageUtils.h @@ -22,7 +22,7 @@ std::pair getFilterKeys( const std::string & primary_key, const DataTypePtr & primary_key_type, const SelectQueryInfo & query_info, const ContextPtr & context); std::pair getFilterKeys( - const String & primary_key, const DataTypePtr & primary_key_type, const ActionsDAGPtr & filter_actions_dag, const ContextPtr & context); + const String & primary_key, const DataTypePtr & primary_key_type, const std::optional & filter_actions_dag, const ContextPtr & context); template void fillColumns(const K & key, const V & value, size_t key_pos, const Block & header, MutableColumns & columns) diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 809401bb279..3aad64a0cfb 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -418,8 +418,11 @@ namespace } StorageKafka::StorageKafka( - const StorageID & table_id_, ContextPtr context_, - const ColumnsDescription & columns_, std::unique_ptr kafka_settings_, + const StorageID & table_id_, + ContextPtr context_, + const ColumnsDescription & columns_, + const String & comment, + std::unique_ptr kafka_settings_, const String & collection_name_) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) @@ -451,6 +454,7 @@ StorageKafka::StorageKafka( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); setVirtuals(createVirtuals(kafka_settings->kafka_handle_error_mode)); @@ -1317,7 +1321,7 @@ void registerStorageKafka(StorageFactory & factory) "See https://clickhouse.com/docs/en/engines/table-engines/integrations/kafka/#configuration"); } - return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(kafka_settings), collection_name); + return std::make_shared(args.table_id, args.getContext(), args.columns, args.comment, std::move(kafka_settings), collection_name); }; factory.registerStorage("Kafka", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index fa4affbda36..31e1a6076b6 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -40,6 +40,7 @@ public: const StorageID & table_id_, ContextPtr context_, const ColumnsDescription & columns_, + const String & comment, std::unique_ptr kafka_settings_, const String & collection_name_); diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 2a697fa5654..7e43966556e 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -160,7 +160,7 @@ KeyDescription KeyDescription::buildEmptyKey() { KeyDescription result; result.expression_list_ast = std::make_shared(); - result.expression = std::make_shared(std::make_shared(), ExpressionActionsSettings{}); + result.expression = std::make_shared(ActionsDAG(), ExpressionActionsSettings{}); return result; } diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index 857cfd78910..aa8f51d5295 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -304,7 +304,7 @@ void RefreshTask::refreshTask() { PreformattedMessage message = getCurrentExceptionMessageAndPattern(true); auto text = message.text; - message.text = fmt::format("Refresh failed: {}", message.text); + message.text = fmt::format("Refresh view {} failed: {}", view->getStorageID().getFullTableName(), message.text); LOG_ERROR(log, message); exception = text; } @@ -357,7 +357,7 @@ void RefreshTask::refreshTask() stop_requested = true; tryLogCurrentException(log, "Unexpected exception in refresh scheduling, please investigate. The view will be stopped."); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD abortOnFailedAssertion("Unexpected exception in refresh scheduling"); #endif } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 378a1944396..df151e8478f 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -73,12 +73,7 @@ std::optional DataPartStorageOnDiskBase::getRelativePathForPrefix(Logger for (int try_no = 0; try_no < 10; ++try_no) { - if (prefix.empty()) - res = part_dir + (try_no ? "_try" + DB::toString(try_no) : ""); - else if (prefix.ends_with("_")) - res = prefix + part_dir + (try_no ? "_try" + DB::toString(try_no) : ""); - else - res = prefix + "_" + part_dir + (try_no ? "_try" + DB::toString(try_no) : ""); + res = getPartDirForPrefix(prefix, detached, try_no); if (!volume->getDisk()->exists(full_relative_path / res)) return res; @@ -101,6 +96,36 @@ std::optional DataPartStorageOnDiskBase::getRelativePathForPrefix(Logger return res; } +String DataPartStorageOnDiskBase::getPartDirForPrefix(const String & prefix, bool detached, int try_no) const +{ + /// This function joins `prefix` and the part name and an attempt number returning something like "__". + String res = prefix; + if (!prefix.empty() && !prefix.ends_with("_")) + res += "_"; + + /// During RESTORE temporary part directories are created with names like "tmp_restore_all_2_2_0-XXXXXXXX". + /// To detach such a directory we need to rename it replacing "tmp_restore_" with a specified prefix, + /// and a random suffix with an attempt number. + String part_name; + if (detached && part_dir.starts_with("tmp_restore_")) + { + part_name = part_dir.substr(strlen("tmp_restore_")); + size_t endpos = part_name.find('-'); + if (endpos != String::npos) + part_name.erase(endpos, String::npos); + } + + if (!part_name.empty()) + res += part_name; + else + res += part_dir; + + if (try_no) + res += "_try" + DB::toString(try_no); + + return res; +} + bool DataPartStorageOnDiskBase::looksLikeBrokenDetachedPartHasTheSameContent(const String & detached_part_path, std::optional & original_checksums_content, std::optional & original_files_list) const diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 81353d4e20b..1707efc2d4d 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -148,6 +148,9 @@ private: /// Actual file name may be the same as expected /// or be the name of the file with packed data. virtual NameSet getActualFileNamesOnDisk(const NameSet & file_names) const = 0; + + /// Returns the destination path for the part directory while copying a detached part. + String getPartDirForPrefix(const String & prefix, bool detached, int try_no) const; }; } diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 8e73021d3e7..061ee356203 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -224,14 +225,18 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write } catch (const Exception & e) { - if (e.code() != ErrorCodes::ABORTED && e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM) + if (e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM + && !isRetryableException(std::current_exception())) + { report_broken_part(); + } throw; } catch (...) { - report_broken_part(); + if (!isRetryableException(std::current_exception())) + report_broken_part(); throw; } } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index c2f87018872..3a44359b537 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -739,10 +739,25 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks } catch (...) { - /// Don't scare people with broken part error + /// Don't scare people with broken part error if it's retryable. if (!isRetryableException(std::current_exception())) + { LOG_ERROR(storage.log, "Part {} is broken and needs manual correction", getDataPartStorage().getFullPath()); + if (Exception * e = exception_cast(std::current_exception())) + { + /// Probably there is something wrong with files of this part. + /// So it can be helpful to add to the error message some information about those files. + String files_in_part; + for (auto it = getDataPartStorage().iterate(); it->isValid(); it->next()) + files_in_part += fmt::format("{}{} ({} bytes)", (files_in_part.empty() ? "" : ", "), it->name(), getDataPartStorage().getFileSize(it->name())); + if (!files_in_part.empty()) + e->addMessage("Part contains files: {}", files_in_part); + if (isEmpty()) + e->addMessage("Part is empty"); + } + } + // There could be conditions that data part to be loaded is broken, but some of meta infos are already written // into metadata before exception, need to clean them all. metadata_manager->deleteAll(/*include_projection*/ true); diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 4ad7f6ef991..264b2b397f4 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -163,8 +163,8 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns if (dag) { dag->addMaterializingOutputActions(); - auto actions = std::make_shared< - ExpressionActions>(std::move(dag), + auto actions = std::make_shared( + std::move(*dag), ExpressionActionsSettings::fromSettings(data_part_info_for_read->getContext()->getSettingsRef())); actions->execute(additional_columns); } diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 40eef8e7431..69bffac9160 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -629,7 +629,7 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) { const auto & index_hint_dag = index_hint->getActions(); - children = index_hint_dag->getOutputs(); + children = index_hint_dag.getOutputs(); for (auto & arg : children) arg = &cloneASTWithInversionPushDown(*arg, inverted_dag, to_inverted, context, need_inversion); @@ -696,22 +696,22 @@ const std::unordered_map KeyConditi {"hilbertEncode", SpaceFillingCurveType::Hilbert} }; -ActionsDAGPtr KeyCondition::cloneASTWithInversionPushDown(ActionsDAG::NodeRawConstPtrs nodes, const ContextPtr & context) +ActionsDAG KeyCondition::cloneASTWithInversionPushDown(ActionsDAG::NodeRawConstPtrs nodes, const ContextPtr & context) { - auto res = std::make_shared(); + ActionsDAG res; std::unordered_map to_inverted; for (auto & node : nodes) - node = &DB::cloneASTWithInversionPushDown(*node, *res, to_inverted, context, false); + node = &DB::cloneASTWithInversionPushDown(*node, res, to_inverted, context, false); if (nodes.size() > 1) { auto function_builder = FunctionFactory::instance().get("and", context); - nodes = {&res->addFunction(function_builder, std::move(nodes), "")}; + nodes = {&res.addFunction(function_builder, std::move(nodes), "")}; } - res->getOutputs().swap(nodes); + res.getOutputs().swap(nodes); return res; } @@ -730,7 +730,7 @@ Block KeyCondition::getBlockWithConstants( if (syntax_analyzer_result) { auto actions = ExpressionAnalyzer(query, syntax_analyzer_result, context).getConstActionsDAG(); - for (const auto & action_node : actions->getOutputs()) + for (const auto & action_node : actions.getOutputs()) { if (action_node->column) result.insert(ColumnWithTypeAndName{action_node->column, action_node->result_type, action_node->result_name}); @@ -785,7 +785,7 @@ void KeyCondition::getAllSpaceFillingCurves() } KeyCondition::KeyCondition( - ActionsDAGPtr filter_dag, + const ActionsDAG * filter_dag, ContextPtr context, const Names & key_column_names, const ExpressionActionsPtr & key_expr_, @@ -826,9 +826,9 @@ KeyCondition::KeyCondition( * are pushed down and applied (when possible) to leaf nodes. */ auto inverted_dag = cloneASTWithInversionPushDown({filter_dag->getOutputs().at(0)}, context); - assert(inverted_dag->getOutputs().size() == 1); + assert(inverted_dag.getOutputs().size() == 1); - const auto * inverted_dag_filter_node = inverted_dag->getOutputs()[0]; + const auto * inverted_dag_filter_node = inverted_dag.getOutputs()[0]; RPNBuilder builder(inverted_dag_filter_node, context, [&](const RPNBuilderTreeNode & node, RPNElement & out) { diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 9e2218d7a29..e9343ec08ea 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -42,7 +42,7 @@ class KeyCondition public: /// Construct key condition from ActionsDAG nodes KeyCondition( - ActionsDAGPtr filter_dag, + const ActionsDAG * filter_dag, ContextPtr context, const Names & key_column_names, const ExpressionActionsPtr & key_expr, @@ -134,7 +134,7 @@ public: DataTypePtr current_type, bool single_point = false); - static ActionsDAGPtr cloneASTWithInversionPushDown(ActionsDAG::NodeRawConstPtrs nodes, const ContextPtr & context); + static ActionsDAG cloneASTWithInversionPushDown(ActionsDAG::NodeRawConstPtrs nodes, const ContextPtr & context); bool matchesExactContinuousRange() const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 78a551591a6..2e10f5a0227 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -474,7 +474,7 @@ StoragePolicyPtr MergeTreeData::getStoragePolicy() const } ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByPredicate( - const StorageSnapshotPtr & storage_snapshot, const ActionsDAGPtr & filter_dag, ContextPtr local_context) const + const StorageSnapshotPtr & storage_snapshot, const ActionsDAG * filter_dag, ContextPtr local_context) const { if (!local_context->getSettingsRef().allow_statistics_optimize) return {}; @@ -499,8 +499,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP { auto stats = part->loadStatistics(); /// TODO: We only have one stats file for every part. + result.addRows(part->rows_count); for (const auto & stat : stats) - result.merge(part->info.getPartNameV1(), part->rows_count, stat); + result.merge(part->info.getPartNameV1(), stat); } catch (...) { @@ -515,8 +516,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP if (!partition_pruner.canBePruned(*part)) { auto stats = part->loadStatistics(); + result.addRows(part->rows_count); for (const auto & stat : stats) - result.merge(part->info.getPartNameV1(), part->rows_count, stat); + result.merge(part->info.getPartNameV1(), stat); } } catch (...) @@ -748,7 +750,7 @@ ExpressionActionsPtr MergeTreeData::getMinMaxExpr(const KeyDescription & partiti if (!partition_key.column_names.empty()) partition_key_columns = partition_key.expression->getRequiredColumnsWithTypes(); - return std::make_shared(std::make_shared(partition_key_columns), settings); + return std::make_shared(ActionsDAG(partition_key_columns), settings); } Names MergeTreeData::getMinMaxColumnsNames(const KeyDescription & partition_key) @@ -1136,7 +1138,7 @@ Block MergeTreeData::getBlockWithVirtualsForFilter( std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( - const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context, const DataPartsVector & parts) const + const ActionsDAG & filter_actions_dag, ContextPtr local_context, const DataPartsVector & parts) const { if (parts.empty()) return 0; @@ -1144,7 +1146,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( auto metadata_snapshot = getInMemoryMetadataPtr(); auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); - auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr); + auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr, /*allow_non_deterministic_functions=*/ false); if (!filter_dag) return {}; @@ -1154,7 +1156,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( if (!virtual_columns_block.has(input->result_name)) valid = false; - PartitionPruner partition_pruner(metadata_snapshot, filter_dag, local_context, true /* strict */); + PartitionPruner partition_pruner(metadata_snapshot, &*filter_dag, local_context, true /* strict */); if (partition_pruner.isUseless() && !valid) return {}; @@ -1162,7 +1164,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( if (valid) { virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, parts); - VirtualColumnUtils::filterBlockWithDAG(filter_dag, virtual_columns_block, local_context); + VirtualColumnUtils::filterBlockWithExpression(VirtualColumnUtils::buildFilterExpression(std::move(*filter_dag), local_context), virtual_columns_block); part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) return 0; @@ -4403,25 +4405,25 @@ bool MergeTreeData::tryRemovePartImmediately(DataPartPtr && part) size_t MergeTreeData::getTotalActiveSizeInBytes() const { - return total_active_size_bytes.load(std::memory_order_acquire); + return total_active_size_bytes.load(); } size_t MergeTreeData::getTotalActiveSizeInRows() const { - return total_active_size_rows.load(std::memory_order_acquire); + return total_active_size_rows.load(); } size_t MergeTreeData::getActivePartsCount() const { - return total_active_size_parts.load(std::memory_order_acquire); + return total_active_size_parts.load(); } size_t MergeTreeData::getOutdatedPartsCount() const { - return total_outdated_parts_count.load(std::memory_order_relaxed); + return total_outdated_parts_count.load(); } size_t MergeTreeData::getNumberOfOutdatedPartsWithExpiredRemovalTime() const @@ -5549,12 +5551,17 @@ public: attachIfAllPartsRestored(); } - String getTemporaryDirectory(const DiskPtr & disk) + String getTemporaryDirectory(const DiskPtr & disk, const String & part_name) { std::lock_guard lock{mutex}; - auto it = temp_dirs.find(disk); - if (it == temp_dirs.end()) - it = temp_dirs.emplace(disk, std::make_shared(disk, "tmp/")).first; + auto it = temp_part_dirs.find(part_name); + if (it == temp_part_dirs.end()) + { + auto temp_part_dir = std::make_shared(disk, fs::path{storage->getRelativeDataPath()} / ("tmp_restore_" + part_name + "-")); + /// Attaching parts will rename them so it's expected for a temporary part directory not to exist anymore in the end. + temp_part_dir->setShowWarningIfRemoved(false); + it = temp_part_dirs.emplace(part_name, temp_part_dir).first; + } return it->second->getRelativePath(); } @@ -5572,7 +5579,7 @@ private: storage->attachRestoredParts(std::move(parts)); parts.clear(); - temp_dirs.clear(); + temp_part_dirs.clear(); num_parts = 0; } @@ -5581,7 +5588,7 @@ private: size_t num_parts = 0; size_t num_broken_parts = 0; MutableDataPartsVector parts; - std::map> temp_dirs; + std::map> temp_part_dirs; mutable std::mutex mutex; }; @@ -5634,9 +5641,11 @@ void MergeTreeData::restorePartFromBackup(std::shared_ptr r String part_name = part_info.getPartNameAndCheckFormat(format_version); auto backup = restored_parts_holder->getBackup(); + /// Find all files of this part in the backup. + Strings filenames = backup->listFiles(part_path_in_backup, /* recursive= */ true); + /// Calculate the total size of the part. UInt64 total_size_of_part = 0; - Strings filenames = backup->listFiles(part_path_in_backup, /* recursive= */ true); fs::path part_path_in_backup_fs = part_path_in_backup; for (const String & filename : filenames) total_size_of_part += backup->getFileSize(part_path_in_backup_fs / filename); @@ -5646,11 +5655,9 @@ void MergeTreeData::restorePartFromBackup(std::shared_ptr r /// Calculate paths, for example: /// part_name = 0_1_1_0 /// part_path_in_backup = /data/test/table/0_1_1_0 - /// tmp_dir = tmp/1aaaaaa - /// tmp_part_dir = tmp/1aaaaaa/data/test/table/0_1_1_0 + /// temp_part_dir = /var/lib/clickhouse/data/test/table/tmp_restore_all_0_1_1_0-XXXXXXXX auto disk = reservation->getDisk(); - fs::path temp_dir = restored_parts_holder->getTemporaryDirectory(disk); - fs::path temp_part_dir = temp_dir / part_path_in_backup_fs.relative_path(); + fs::path temp_part_dir = restored_parts_holder->getTemporaryDirectory(disk, part_name); /// Subdirectories in the part's directory. It's used to restore projections. std::unordered_set subdirs; @@ -5677,22 +5684,25 @@ void MergeTreeData::restorePartFromBackup(std::shared_ptr r reservation->update(reservation->getSize() - file_size); } - if (auto part = loadPartRestoredFromBackup(disk, temp_part_dir.parent_path(), part_name, detach_if_broken)) + if (auto part = loadPartRestoredFromBackup(part_name, disk, temp_part_dir, detach_if_broken)) restored_parts_holder->addPart(part); else restored_parts_holder->increaseNumBrokenParts(); } -MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartRestoredFromBackup(const DiskPtr & disk, const String & temp_dir, const String & part_name, bool detach_if_broken) const +MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartRestoredFromBackup(const String & part_name, const DiskPtr & disk, const String & temp_part_dir, bool detach_if_broken) const { MutableDataPartPtr part; auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); + fs::path full_part_dir{temp_part_dir}; + String parent_part_dir = full_part_dir.parent_path(); + String part_dir_name = full_part_dir.filename(); - /// Load this part from the directory `tmp_part_dir`. + /// Load this part from the directory `temp_part_dir`. auto load_part = [&] { - MergeTreeDataPartBuilder builder(*this, part_name, single_disk_volume, temp_dir, part_name); + MergeTreeDataPartBuilder builder(*this, part_name, single_disk_volume, parent_part_dir, part_dir_name); builder.withPartFormatFromDisk(); part = std::move(builder).build(); part->version.setCreationTID(Tx::PrehistoricTID, nullptr); @@ -5707,7 +5717,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartRestoredFromBackup(cons if (!part) { /// Make a fake data part only to copy its files to /detached/. - part = MergeTreeDataPartBuilder{*this, part_name, single_disk_volume, temp_dir, part_name} + part = MergeTreeDataPartBuilder{*this, part_name, single_disk_volume, parent_part_dir, part_dir_name} .withPartStorageType(MergeTreeDataPartStorageType::Full) .withPartType(MergeTreeDataPartType::Wide) .build(); @@ -6829,7 +6839,7 @@ using PartitionIdToMaxBlock = std::unordered_map; Block MergeTreeData::getMinMaxCountProjectionBlock( const StorageMetadataPtr & metadata_snapshot, const Names & required_columns, - const ActionsDAGPtr & filter_dag, + const ActionsDAG * filter_dag, const DataPartsVector & parts, const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context) const @@ -7068,7 +7078,7 @@ ActionDAGNodes MergeTreeData::getFiltersForPrimaryKeyAnalysis(const InterpreterS ActionDAGNodes filter_nodes; if (auto additional_filter_info = select.getAdditionalQueryInfo()) - filter_nodes.nodes.push_back(&additional_filter_info->actions->findInOutputs(additional_filter_info->column_name)); + filter_nodes.nodes.push_back(&additional_filter_info->actions.findInOutputs(additional_filter_info->column_name)); if (before_where) filter_nodes.nodes.push_back(&before_where->dag.findInOutputs(where_column_name)); @@ -8174,16 +8184,16 @@ void MergeTreeData::removePartContributionToDataVolume(const DataPartPtr & part) void MergeTreeData::increaseDataVolume(ssize_t bytes, ssize_t rows, ssize_t parts) { - total_active_size_bytes.fetch_add(bytes, std::memory_order_acq_rel); - total_active_size_rows.fetch_add(rows, std::memory_order_acq_rel); - total_active_size_parts.fetch_add(parts, std::memory_order_acq_rel); + total_active_size_bytes.fetch_add(bytes); + total_active_size_rows.fetch_add(rows); + total_active_size_parts.fetch_add(parts); } void MergeTreeData::setDataVolume(size_t bytes, size_t rows, size_t parts) { - total_active_size_bytes.store(bytes, std::memory_order_release); - total_active_size_rows.store(rows, std::memory_order_release); - total_active_size_parts.store(parts, std::memory_order_release); + total_active_size_bytes.store(bytes); + total_active_size_rows.store(rows); + total_active_size_parts.store(parts); } bool MergeTreeData::insertQueryIdOrThrow(const String & query_id, size_t max_queries) const diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 7076b680521..6662df3db84 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -403,7 +403,7 @@ public: Block getMinMaxCountProjectionBlock( const StorageMetadataPtr & metadata_snapshot, const Names & required_columns, - const ActionsDAGPtr & filter_dag, + const ActionsDAG * filter_dag, const DataPartsVector & parts, const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context) const; @@ -426,7 +426,7 @@ public: bool supportsPrewhere() const override { return true; } - ConditionSelectivityEstimator getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const override; + ConditionSelectivityEstimator getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAG *, ContextPtr) const override; bool supportsFinal() const override; @@ -1227,7 +1227,7 @@ protected: boost::iterator_range range, const ColumnsDescription & storage_columns); std::optional totalRowsByPartitionPredicateImpl( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context, const DataPartsVector & parts) const; + const ActionsDAG & filter_actions_dag, ContextPtr context, const DataPartsVector & parts) const; static decltype(auto) getStateModifier(DataPartState state) { @@ -1473,7 +1473,7 @@ protected: /// Restores the parts of this table from backup. void restorePartsFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions); void restorePartFromBackup(std::shared_ptr restored_parts_holder, const MergeTreePartInfo & part_info, const String & part_path_in_backup, bool detach_if_broken) const; - MutableDataPartPtr loadPartRestoredFromBackup(const DiskPtr & disk, const String & temp_dir, const String & part_name, bool detach_if_broken) const; + MutableDataPartPtr loadPartRestoredFromBackup(const String & part_name, const DiskPtr & disk, const String & temp_part_dir, bool detach_if_broken) const; /// Attaches restored parts to the storage. virtual void attachRestoredParts(MutableDataPartsVector && parts) = 0; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c5214f0f3d3..a6ef0063069 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -430,7 +430,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( ASTPtr query = sampling.filter_function; auto syntax_result = TreeRewriter(context).analyze(query, available_real_columns); - sampling.filter_expression = ExpressionAnalyzer(sampling.filter_function, syntax_result, context).getActionsDAG(false); + sampling.filter_expression = std::make_shared(ExpressionAnalyzer(sampling.filter_function, syntax_result, context).getActionsDAG(false)); } } @@ -444,7 +444,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( } void MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset( - std::optional & part_offset_condition, const ActionsDAGPtr & filter_dag, ContextPtr context) + std::optional & part_offset_condition, const ActionsDAG * filter_dag, ContextPtr context) { if (!filter_dag) return; @@ -465,10 +465,10 @@ void MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset( return; part_offset_condition.emplace(KeyCondition{ - dag, + &*dag, context, sample.getNames(), - std::make_shared(std::make_shared(sample.getColumnsWithTypeAndName()), ExpressionActionsSettings{}), + std::make_shared(ActionsDAG(sample.getColumnsWithTypeAndName()), ExpressionActionsSettings{}), {}}); } @@ -476,7 +476,7 @@ std::optional> MergeTreeDataSelectExecutor::filterPar const StorageMetadataPtr & metadata_snapshot, const MergeTreeData & data, const MergeTreeData::DataPartsVector & parts, - const ActionsDAGPtr & filter_dag, + const ActionsDAG * filter_dag, ContextPtr context) { if (!filter_dag) @@ -488,7 +488,7 @@ std::optional> MergeTreeDataSelectExecutor::filterPar return {}; auto virtual_columns_block = data.getBlockWithVirtualsForFilter(metadata_snapshot, parts); - VirtualColumnUtils::filterBlockWithDAG(dag, virtual_columns_block, context); + VirtualColumnUtils::filterBlockWithExpression(VirtualColumnUtils::buildFilterExpression(std::move(*dag), context), virtual_columns_block); return VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 788355c1e59..39bff5eacd6 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -161,7 +161,7 @@ public: /// If possible, construct optional key condition from predicates containing _part_offset column. static void buildKeyConditionFromPartOffset( - std::optional & part_offset_condition, const ActionsDAGPtr & filter_dag, ContextPtr context); + std::optional & part_offset_condition, const ActionsDAG * filter_dag, ContextPtr context); /// If possible, filter using expression on virtual columns. /// Example: SELECT count() FROM table WHERE _part = 'part_name' @@ -170,7 +170,7 @@ public: const StorageMetadataPtr & metadata_snapshot, const MergeTreeData & data, const MergeTreeData::DataPartsVector & parts, - const ActionsDAGPtr & filter_dag, + const ActionsDAG * filter_dag, ContextPtr context); /// Filter parts using minmax index and partition key. diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index c66aa2373e7..497e86334f3 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -333,7 +333,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const Selec return std::make_shared(index, query, distance_function, context); }; -MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const ActionsDAGPtr &, ContextPtr) const +MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const ActionsDAG *, ContextPtr) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeIndexAnnoy cannot be created with ActionsDAG"); } diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h index d511ab84859..282920c608e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h @@ -99,7 +99,7 @@ public: MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const; - MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAGPtr &, ContextPtr) const override; + MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG *, ContextPtr) const override; bool isVectorSearch() const override { return true; } private: diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index fc5147bb56c..c6a00751f25 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -201,7 +201,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & } MergeTreeIndexConditionBloomFilter::MergeTreeIndexConditionBloomFilter( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context_, const Block & header_, size_t hash_functions_) + const ActionsDAG * filter_actions_dag, ContextPtr context_, const Block & header_, size_t hash_functions_) : WithContext(context_), header(header_), hash_functions(hash_functions_) { if (!filter_actions_dag) @@ -897,7 +897,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator(con return std::make_shared(bits_per_row, hash_functions, index.column_names); } -MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const +MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const ActionsDAG * filter_actions_dag, ContextPtr context) const { return std::make_shared(filter_actions_dag, context, index.sample_block, hash_functions); } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h index d66c4b8b6ca..bd1b137176a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h @@ -69,7 +69,7 @@ public: std::vector> predicate; }; - MergeTreeIndexConditionBloomFilter(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_, const Block & header_, size_t hash_functions_); + MergeTreeIndexConditionBloomFilter(const ActionsDAG * filter_actions_dag, ContextPtr context_, const Block & header_, size_t hash_functions_); bool alwaysUnknownOrTrue() const override; @@ -142,7 +142,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; - MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override; + MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG * filter_actions_dag, ContextPtr context) const override; private: size_t bits_per_row; diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp index 8cf58687125..5b6813d12e3 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp @@ -138,7 +138,7 @@ void MergeTreeIndexAggregatorBloomFilterText::update(const Block & block, size_t } MergeTreeConditionBloomFilterText::MergeTreeConditionBloomFilterText( - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, ContextPtr context, const Block & index_sample_block, const BloomFilterParameters & params_, @@ -733,7 +733,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilterText::createIndexAggregator } MergeTreeIndexConditionPtr MergeTreeIndexBloomFilterText::createIndexCondition( - const ActionsDAGPtr & filter_dag, ContextPtr context) const + const ActionsDAG * filter_dag, ContextPtr context) const { return std::make_shared(filter_dag, context, index.sample_block, params, token_extractor.get()); } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.h b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.h index 6fd969030df..fe042884550 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.h +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.h @@ -62,7 +62,7 @@ class MergeTreeConditionBloomFilterText final : public IMergeTreeIndexCondition { public: MergeTreeConditionBloomFilterText( - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, ContextPtr context, const Block & index_sample_block, const BloomFilterParameters & params_, @@ -163,7 +163,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition( - const ActionsDAGPtr & filter_dag, ContextPtr context) const override; + const ActionsDAG * filter_dag, ContextPtr context) const override; BloomFilterParameters params; /// Function for selecting next token. diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 47ce24b91eb..cd6af68ebcc 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -186,7 +186,7 @@ void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos, } MergeTreeConditionFullText::MergeTreeConditionFullText( - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, ContextPtr context_, const Block & index_sample_block, const GinFilterParameters & params_, @@ -768,7 +768,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexFullText::createIndexAggregatorForPart } MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const + const ActionsDAG * filter_actions_dag, ContextPtr context) const { return std::make_shared(filter_actions_dag, context, index.sample_block, params, token_extractor.get()); }; diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h index 1a5e848e5ac..8e0b1a22acb 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.h +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -63,7 +63,7 @@ class MergeTreeConditionFullText final : public IMergeTreeIndexCondition, WithCo { public: MergeTreeConditionFullText( - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, ContextPtr context, const Block & index_sample_block, const GinFilterParameters & params_, @@ -170,7 +170,7 @@ public: MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexAggregatorPtr createIndexAggregatorForPart(const GinIndexStorePtr & store, const MergeTreeWriterSettings & /*settings*/) const override; - MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override; + MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG * filter_actions_dag, ContextPtr context) const override; GinFilterParameters params; /// Function for selecting next token. diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp index 0995e2724ec..cd8065ecadf 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp @@ -79,7 +79,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexHypothesis::createIndexAggregator(cons } MergeTreeIndexConditionPtr MergeTreeIndexHypothesis::createIndexCondition( - const ActionsDAGPtr &, ContextPtr) const + const ActionsDAG *, ContextPtr) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not supported"); } diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h index 130e708d76f..e60335fe724 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h @@ -69,7 +69,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override; + const ActionsDAG * filter_actions_dag, ContextPtr context) const override; MergeTreeIndexMergedConditionPtr createIndexMergedCondition( const SelectQueryInfo & query_info, StorageMetadataPtr storage_metadata) const override; diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index 20dfed8cf8f..c60d63a59ba 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -157,7 +157,7 @@ void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, s namespace { -KeyCondition buildCondition(const IndexDescription & index, const ActionsDAGPtr & filter_actions_dag, ContextPtr context) +KeyCondition buildCondition(const IndexDescription & index, const ActionsDAG * filter_actions_dag, ContextPtr context) { return KeyCondition{filter_actions_dag, context, index.column_names, index.expression}; } @@ -165,7 +165,7 @@ KeyCondition buildCondition(const IndexDescription & index, const ActionsDAGPtr } MergeTreeIndexConditionMinMax::MergeTreeIndexConditionMinMax( - const IndexDescription & index, const ActionsDAGPtr & filter_actions_dag, ContextPtr context) + const IndexDescription & index, const ActionsDAG * filter_actions_dag, ContextPtr context) : index_data_types(index.data_types) , condition(buildCondition(index, filter_actions_dag, context)) { @@ -198,7 +198,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexMinMax::createIndexAggregator(const Me } MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const + const ActionsDAG * filter_actions_dag, ContextPtr context) const { return std::make_shared(index, filter_actions_dag, context); } diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.h b/src/Storages/MergeTree/MergeTreeIndexMinMax.h index dca26fb7b28..c5031ccbb27 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.h +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.h @@ -50,7 +50,7 @@ class MergeTreeIndexConditionMinMax final : public IMergeTreeIndexCondition public: MergeTreeIndexConditionMinMax( const IndexDescription & index, - const ActionsDAGPtr & filter_actions_dag, + const ActionsDAG * filter_actions_dag, ContextPtr context); bool alwaysUnknownOrTrue() const override; @@ -77,7 +77,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override; + const ActionsDAG * filter_actions_dag, ContextPtr context) const override; const char* getSerializedFileExtension() const override { return ".idx2"; } MergeTreeIndexFormat getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & path_prefix) const override; /// NOLINT diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index b11cbf1e034..a92df4ac72d 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -44,10 +44,12 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet( const String & index_name_, const Block & index_sample_block_, size_t max_rows_, - MutableColumns && mutable_columns_) + MutableColumns && mutable_columns_, + std::vector && set_hyperrectangle_) : index_name(index_name_) , max_rows(max_rows_) , block(index_sample_block_.cloneWithColumns(std::move(mutable_columns_))) + , set_hyperrectangle(std::move(set_hyperrectangle_)) { } @@ -106,6 +108,10 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; settings.position_independent_encoding = false; + set_hyperrectangle.clear(); + Field min_val; + Field max_val; + for (size_t i = 0; i < num_columns; ++i) { auto & elem = block.getByPosition(i); @@ -116,6 +122,13 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr); serialization->deserializeBinaryBulkWithMultipleStreams(elem.column, rows_to_read, settings, state, nullptr); + + if (const auto * column_nullable = typeid_cast(elem.column.get())) + column_nullable->getExtremesNullLast(min_val, max_val); + else + elem.column->getExtremes(min_val, max_val); + + set_hyperrectangle.emplace_back(min_val, true, max_val, true); } } @@ -182,10 +195,29 @@ void MergeTreeIndexAggregatorSet::update(const Block & block, size_t * pos, size if (has_new_data) { + FieldRef field_min; + FieldRef field_max; for (size_t i = 0; i < columns.size(); ++i) { auto filtered_column = block.getByName(index_columns[i]).column->filter(filter, block.rows()); columns[i]->insertRangeFrom(*filtered_column, 0, filtered_column->size()); + + if (const auto * column_nullable = typeid_cast(filtered_column.get())) + column_nullable->getExtremesNullLast(field_min, field_max); + else + filtered_column->getExtremes(field_min, field_max); + + if (set_hyperrectangle.size() <= i) + { + set_hyperrectangle.emplace_back(field_min, true, field_max, true); + } + else + { + set_hyperrectangle[i].left + = applyVisitor(FieldVisitorAccurateLess(), set_hyperrectangle[i].left, field_min) ? set_hyperrectangle[i].left : field_min; + set_hyperrectangle[i].right + = applyVisitor(FieldVisitorAccurateLess(), set_hyperrectangle[i].right, field_max) ? field_max : set_hyperrectangle[i].right; + } } } @@ -221,7 +253,7 @@ bool MergeTreeIndexAggregatorSet::buildFilter( MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset() { - auto granule = std::make_shared(index_name, index_sample_block, max_rows, std::move(columns)); + auto granule = std::make_shared(index_name, index_sample_block, max_rows, std::move(columns), std::move(set_hyperrectangle)); switch (data.type) { @@ -240,17 +272,22 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset() return granule; } +KeyCondition buildCondition(const IndexDescription & index, const ActionsDAG * filter_actions_dag, ContextPtr context) +{ + return KeyCondition{filter_actions_dag, context, index.column_names, index.expression}; +} MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( - const String & index_name_, - const Block & index_sample_block, size_t max_rows_, - const ActionsDAGPtr & filter_dag, - ContextPtr context) - : index_name(index_name_) + const ActionsDAG * filter_dag, + ContextPtr context, + const IndexDescription & index_description) + : index_name(index_description.name) , max_rows(max_rows_) + , index_data_types(index_description.data_types) + , condition(buildCondition(index_description, filter_dag, context)) { - for (const auto & name : index_sample_block.getNames()) + for (const auto & name : index_description.sample_block.getNames()) if (!key_columns.contains(name)) key_columns.insert(name); @@ -266,15 +303,15 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( return; auto filter_actions_dag = filter_dag->clone(); - const auto * filter_actions_dag_node = filter_actions_dag->getOutputs().at(0); + const auto * filter_actions_dag_node = filter_actions_dag.getOutputs().at(0); std::unordered_map node_to_result_node; - filter_actions_dag->getOutputs()[0] = &traverseDAG(*filter_actions_dag_node, filter_actions_dag, context, node_to_result_node); + filter_actions_dag.getOutputs()[0] = &traverseDAG(*filter_actions_dag_node, filter_actions_dag, context, node_to_result_node); - filter_actions_dag->removeUnusedActions(); - actions = std::make_shared(filter_actions_dag); + filter_actions_dag.removeUnusedActions(); - actions_output_column_name = filter_actions_dag->getOutputs().at(0)->result_name; + actions_output_column_name = filter_actions_dag.getOutputs().at(0)->result_name; + actions = std::make_shared(std::move(filter_actions_dag)); } bool MergeTreeIndexConditionSet::alwaysUnknownOrTrue() const @@ -293,6 +330,9 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx if (size == 0 || (max_rows != 0 && size > max_rows)) return true; + if (!condition.checkInHyperrectangle(granule.set_hyperrectangle, index_data_types).can_be_true) + return false; + Block result = granule.block; actions->execute(result); @@ -306,7 +346,7 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx } -static const ActionsDAG::NodeRawConstPtrs & getArguments(const ActionsDAG::Node & node, const ActionsDAGPtr & result_dag_or_null, ActionsDAG::NodeRawConstPtrs * storage) +static const ActionsDAG::NodeRawConstPtrs & getArguments(const ActionsDAG::Node & node, ActionsDAG * result_dag_or_null, ActionsDAG::NodeRawConstPtrs * storage) { chassert(node.type == ActionsDAG::ActionType::FUNCTION); if (node.function_base->getName() != "indexHint") @@ -316,17 +356,17 @@ static const ActionsDAG::NodeRawConstPtrs & getArguments(const ActionsDAG::Node const auto & adaptor = typeid_cast(*node.function_base); const auto & index_hint = typeid_cast(*adaptor.getFunction()); if (!result_dag_or_null) - return index_hint.getActions()->getOutputs(); + return index_hint.getActions().getOutputs(); /// Import the DAG and map argument pointers. - ActionsDAGPtr actions_clone = index_hint.getActions()->clone(); + auto actions_clone = index_hint.getActions().clone(); chassert(storage); - result_dag_or_null->mergeNodes(std::move(*actions_clone), storage); + result_dag_or_null->mergeNodes(std::move(actions_clone), storage); return *storage; } const ActionsDAG::Node & MergeTreeIndexConditionSet::traverseDAG(const ActionsDAG::Node & node, - ActionsDAGPtr & result_dag, + ActionsDAG & result_dag, const ContextPtr & context, std::unordered_map & node_to_result_node) const { @@ -348,7 +388,7 @@ const ActionsDAG::Node & MergeTreeIndexConditionSet::traverseDAG(const ActionsDA atom_node_ptr->type == ActionsDAG::ActionType::FUNCTION) { auto bit_wrapper_function = FunctionFactory::instance().get("__bitWrapperFunc", context); - result_node = &result_dag->addFunction(bit_wrapper_function, {atom_node_ptr}, {}); + result_node = &result_dag.addFunction(bit_wrapper_function, {atom_node_ptr}, {}); } } else @@ -359,14 +399,14 @@ const ActionsDAG::Node & MergeTreeIndexConditionSet::traverseDAG(const ActionsDA unknown_field_column_with_type.type = std::make_shared(); unknown_field_column_with_type.column = unknown_field_column_with_type.type->createColumnConst(1, UNKNOWN_FIELD); - result_node = &result_dag->addColumn(unknown_field_column_with_type); + result_node = &result_dag.addColumn(unknown_field_column_with_type); } node_to_result_node.emplace(&node, result_node); return *result_node; } -const ActionsDAG::Node * MergeTreeIndexConditionSet::atomFromDAG(const ActionsDAG::Node & node, ActionsDAGPtr & result_dag, const ContextPtr & context) const +const ActionsDAG::Node * MergeTreeIndexConditionSet::atomFromDAG(const ActionsDAG::Node & node, ActionsDAG & result_dag, const ContextPtr & context) const { /// Function, literal or column @@ -386,7 +426,7 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::atomFromDAG(const ActionsDA const auto * result_node = node_to_check; if (node.type != ActionsDAG::ActionType::INPUT) - result_node = &result_dag->addInput(column_name, node.result_type); + result_node = &result_dag.addInput(column_name, node.result_type); return result_node; } @@ -407,11 +447,11 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::atomFromDAG(const ActionsDA return nullptr; } - return &result_dag->addFunction(node.function_base, children, {}); + return &result_dag.addFunction(node.function_base, children, {}); } const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const ActionsDAG::Node & node, - ActionsDAGPtr & result_dag, + ActionsDAG & result_dag, const ContextPtr & context, std::unordered_map & node_to_result_node) const { @@ -429,7 +469,7 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio auto function_name = node_to_check->function->getName(); ActionsDAG::NodeRawConstPtrs temp_ptrs_to_argument; - const auto & arguments = getArguments(*node_to_check, result_dag, &temp_ptrs_to_argument); + const auto & arguments = getArguments(*node_to_check, &result_dag, &temp_ptrs_to_argument); size_t arguments_size = arguments.size(); if (function_name == "not") @@ -440,7 +480,7 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio const ActionsDAG::Node * argument = &traverseDAG(*arguments[0], result_dag, context, node_to_result_node); auto bit_swap_last_two_function = FunctionFactory::instance().get("__bitSwapLastTwo", context); - return &result_dag->addFunction(bit_swap_last_two_function, {argument}, {}); + return &result_dag.addFunction(bit_swap_last_two_function, {argument}, {}); } else if (function_name == "and" || function_name == "indexHint" || function_name == "or") { @@ -468,7 +508,7 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio const auto * before_last_argument = children.back(); children.pop_back(); - last_argument = &result_dag->addFunction(function, {before_last_argument, last_argument}, {}); + last_argument = &result_dag.addFunction(function, {before_last_argument, last_argument}, {}); } return last_argument; @@ -544,9 +584,9 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexSet::createIndexAggregator(const Merge } MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const + const ActionsDAG * filter_actions_dag, ContextPtr context) const { - return std::make_shared(index.name, index.sample_block, max_rows, filter_actions_dag, context); + return std::make_shared(max_rows, filter_actions_dag, context, index); } MergeTreeIndexPtr setIndexCreator(const IndexDescription & index) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index 6efc2effafd..ff1f45528d7 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -22,7 +22,8 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule const String & index_name_, const Block & index_sample_block_, size_t max_rows_, - MutableColumns && columns_); + MutableColumns && columns_, + std::vector && set_hyperrectangle_); void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override; @@ -36,6 +37,7 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule const size_t max_rows; Block block; + std::vector set_hyperrectangle; }; @@ -73,6 +75,7 @@ private: ClearableSetVariants data; Sizes key_sizes; MutableColumns columns; + std::vector set_hyperrectangle; }; @@ -80,11 +83,10 @@ class MergeTreeIndexConditionSet final : public IMergeTreeIndexCondition { public: MergeTreeIndexConditionSet( - const String & index_name_, - const Block & index_sample_block, size_t max_rows_, - const ActionsDAGPtr & filter_dag, - ContextPtr context); + const ActionsDAG * filter_dag, + ContextPtr context, + const IndexDescription & index_description); bool alwaysUnknownOrTrue() const override; @@ -93,16 +95,16 @@ public: ~MergeTreeIndexConditionSet() override = default; private: const ActionsDAG::Node & traverseDAG(const ActionsDAG::Node & node, - ActionsDAGPtr & result_dag, + ActionsDAG & result_dag, const ContextPtr & context, std::unordered_map & node_to_result_node) const; const ActionsDAG::Node * atomFromDAG(const ActionsDAG::Node & node, - ActionsDAGPtr & result_dag, + ActionsDAG & result_dag, const ContextPtr & context) const; const ActionsDAG::Node * operatorFromDAG(const ActionsDAG::Node & node, - ActionsDAGPtr & result_dag, + ActionsDAG & result_dag, const ContextPtr & context, std::unordered_map & node_to_result_node) const; @@ -119,6 +121,9 @@ private: std::unordered_set key_columns; ExpressionActionsPtr actions; String actions_output_column_name; + + DataTypes index_data_types; + KeyCondition condition; }; @@ -138,7 +143,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override; + const ActionsDAG * filter_actions_dag, ContextPtr context) const override; size_t max_rows = 0; }; diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp index c9df7210569..59a4b0fbf9c 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp @@ -367,7 +367,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const Sel return std::make_shared(index, query, distance_function, context); }; -MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const ActionsDAGPtr &, ContextPtr) const +MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const ActionsDAG *, ContextPtr) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeIndexAnnoy cannot be created with ActionsDAG"); } diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h index 5107cfee371..41de94402c9 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h @@ -101,7 +101,7 @@ public: MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const; - MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAGPtr &, ContextPtr) const override; + MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG *, ContextPtr) const override; bool isVectorSearch() const override { return true; } private: diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index a9f1fa9378f..1be73e1c811 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -167,7 +167,7 @@ struct IMergeTreeIndex } virtual MergeTreeIndexConditionPtr createIndexCondition( - const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const = 0; + const ActionsDAG * filter_actions_dag, ContextPtr context) const = 0; virtual bool isVectorSearch() const { return false; } diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index b6f626c992b..a9b77fb6c03 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -329,7 +329,7 @@ void MergeTreePrefetchedReadPool::fillPerPartStatistics() part_stat.sum_marks += range.end - range.begin; const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info - ? prewhere_info->prewhere_actions->getRequiredColumnsNames() + ? prewhere_info->prewhere_actions.getRequiredColumnsNames() : column_names; part_stat.approx_size_of_mark = getApproximateSizeOfGranule(*read_info.data_part, columns); @@ -379,7 +379,6 @@ void MergeTreePrefetchedReadPool::fillPerThreadTasks(size_t threads, size_t sum_ for (const auto & part : per_part_statistics) total_size_approx += part.sum_marks * part.approx_size_of_mark; - size_t min_prefetch_step_marks = pool_settings.min_marks_for_concurrent_read; for (size_t i = 0; i < per_part_infos.size(); ++i) { auto & part_stat = per_part_statistics[i]; @@ -394,29 +393,7 @@ void MergeTreePrefetchedReadPool::fillPerThreadTasks(size_t threads, size_t sum_ 1, static_cast(std::round(static_cast(settings.filesystem_prefetch_step_bytes) / part_stat.approx_size_of_mark))); } - /// This limit is important to avoid spikes of slow aws getObject requests when parallelizing within one file. - /// (The default is taken from here https://docs.aws.amazon.com/whitepapers/latest/s3-optimizing-performance-best-practices/use-byte-range-fetches.html). - if (part_stat.approx_size_of_mark - && settings.filesystem_prefetch_min_bytes_for_single_read_task - && part_stat.approx_size_of_mark < settings.filesystem_prefetch_min_bytes_for_single_read_task) - { - const size_t min_prefetch_step_marks_by_total_cols = static_cast( - std::ceil(static_cast(settings.filesystem_prefetch_min_bytes_for_single_read_task) / part_stat.approx_size_of_mark)); - - /// At least one task to start working on it right now and another one to prefetch in the meantime. - const size_t new_min_prefetch_step_marks = std::min(min_prefetch_step_marks_by_total_cols, sum_marks / threads / 2); - if (min_prefetch_step_marks < new_min_prefetch_step_marks) - { - LOG_DEBUG(log, "Increasing min prefetch step from {} to {}", min_prefetch_step_marks, new_min_prefetch_step_marks); - min_prefetch_step_marks = new_min_prefetch_step_marks; - } - } - - if (part_stat.prefetch_step_marks < min_prefetch_step_marks) - { - LOG_DEBUG(log, "Increasing prefetch step from {} to {}", part_stat.prefetch_step_marks, min_prefetch_step_marks); - part_stat.prefetch_step_marks = min_prefetch_step_marks; - } + part_stat.prefetch_step_marks = std::max(part_stat.prefetch_step_marks, per_part_infos[i]->min_marks_per_task); LOG_DEBUG( log, @@ -433,11 +410,10 @@ void MergeTreePrefetchedReadPool::fillPerThreadTasks(size_t threads, size_t sum_ LOG_DEBUG( log, - "Sum marks: {}, threads: {}, min_marks_per_thread: {}, min prefetch step marks: {}, prefetches limit: {}, total_size_approx: {}", + "Sum marks: {}, threads: {}, min_marks_per_thread: {}, prefetches limit: {}, total_size_approx: {}", sum_marks, threads, min_marks_per_thread, - min_prefetch_step_marks, settings.filesystem_prefetches_limit, total_size_approx); diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 1daf46b6e25..cc321cd5a4d 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -26,14 +26,6 @@ extern const int CANNOT_SCHEDULE_TASK; extern const int LOGICAL_ERROR; } -size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & columns_to_read) -{ - ColumnSize columns_size{}; - for (const auto & col_name : columns_to_read) - columns_size.add(part.getColumnSize(col_name)); - return columns_size.data_compressed; -} - MergeTreeReadPool::MergeTreeReadPool( RangesInDataParts && parts_, VirtualFields shared_virtual_fields_, @@ -54,37 +46,9 @@ MergeTreeReadPool::MergeTreeReadPool( column_names_, settings_, context_) - , min_marks_for_concurrent_read(pool_settings.min_marks_for_concurrent_read) , backoff_settings{context_->getSettingsRef()} , backoff_state{pool_settings.threads} { - if (std::ranges::count(is_part_on_remote_disk, true)) - { - const auto & settings = context_->getSettingsRef(); - - size_t total_compressed_bytes = 0; - size_t total_marks = 0; - for (const auto & part : parts_ranges) - { - const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info - ? prewhere_info->prewhere_actions->getRequiredColumnsNames() - : column_names_; - - total_compressed_bytes += getApproxSizeOfPart(*part.data_part, columns); - total_marks += part.getMarksCount(); - } - - if (total_marks) - { - const auto min_bytes_per_task = settings.merge_tree_min_bytes_per_task_for_remote_reading; - const auto avg_mark_bytes = std::max(total_compressed_bytes / total_marks, 1); - /// We're taking min here because number of tasks shouldn't be too low - it will make task stealing impossible. - const auto heuristic_min_marks = std::min(total_marks / pool_settings.threads, min_bytes_per_task / avg_mark_bytes); - - min_marks_for_concurrent_read = std::max(heuristic_min_marks, min_marks_for_concurrent_read); - } - } - fillPerThreadInfo(pool_settings.threads, pool_settings.sum_marks); } @@ -129,15 +93,16 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t task_idx, MergeTreeReadTa const auto part_idx = thread_task.part_idx; auto & marks_in_part = thread_tasks.sum_marks_in_parts.back(); + const auto min_marks_per_task = per_part_infos[part_idx]->min_marks_per_task; size_t need_marks; if (is_part_on_remote_disk[part_idx] && !pool_settings.use_const_size_tasks_for_remote_reading) need_marks = marks_in_part; else /// Get whole part to read if it is small enough. - need_marks = std::min(marks_in_part, min_marks_for_concurrent_read); + need_marks = std::min(marks_in_part, min_marks_per_task); /// Do not leave too little rows in part for next time. - if (marks_in_part > need_marks && marks_in_part - need_marks < min_marks_for_concurrent_read / 2) + if (marks_in_part > need_marks && marks_in_part - need_marks < min_marks_per_task / 2) need_marks = marks_in_part; MarkRanges ranges_to_get_from_part; @@ -256,8 +221,6 @@ void MergeTreeReadPool::fillPerThreadInfo(size_t threads, size_t sum_marks) parts_queue.push(std::move(info.second)); } - LOG_DEBUG(log, "min_marks_for_concurrent_read={}", min_marks_for_concurrent_read); - const size_t min_marks_per_thread = (sum_marks - 1) / threads + 1; for (size_t i = 0; i < threads && !parts_queue.empty(); ++i) @@ -270,15 +233,14 @@ void MergeTreeReadPool::fillPerThreadInfo(size_t threads, size_t sum_marks) auto & part_with_ranges = current_parts.back().part; size_t & marks_in_part = current_parts.back().sum_marks; const auto part_idx = current_parts.back().part_idx; + const auto min_marks_per_task = per_part_infos[part_idx]->min_marks_per_task; /// Do not get too few rows from part. - if (marks_in_part >= min_marks_for_concurrent_read && - need_marks < min_marks_for_concurrent_read) - need_marks = min_marks_for_concurrent_read; + if (marks_in_part >= min_marks_per_task && need_marks < min_marks_per_task) + need_marks = min_marks_per_task; /// Do not leave too few rows in part for next time. - if (marks_in_part > need_marks && - marks_in_part - need_marks < min_marks_for_concurrent_read) + if (marks_in_part > need_marks && marks_in_part - need_marks < min_marks_per_task) need_marks = marks_in_part; MarkRanges ranges_to_get_from_part; diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index cd12b9db9fd..7f0de21e1a4 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -73,7 +73,6 @@ private: void fillPerThreadInfo(size_t threads, size_t sum_marks); mutable std::mutex mutex; - size_t min_marks_for_concurrent_read = 0; /// State to track numbers of slow reads. struct BackoffState diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 0ea19370d45..6d2560bc9c7 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -1,6 +1,10 @@ #include -#include + +#include #include +#include + +#include namespace DB @@ -34,10 +38,58 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase( , header(storage_snapshot->getSampleBlockForColumns(column_names)) , profile_callback([this](ReadBufferFromFileBase::ProfileInfo info_) { profileFeedback(info_); }) { - fillPerPartInfos(); + fillPerPartInfos(context_->getSettingsRef()); } -void MergeTreeReadPoolBase::fillPerPartInfos() +static size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & columns_to_read) +{ + ColumnSize columns_size{}; + for (const auto & col_name : columns_to_read) + columns_size.add(part.getColumnSize(col_name)); + /// For compact parts we don't know individual column sizes, let's use whole part size as approximation + return columns_size.data_compressed ? columns_size.data_compressed : part.getBytesOnDisk(); +} + +static size_t calculateMinMarksPerTask( + const RangesInDataPart & part, + const Names & columns_to_read, + PrewhereInfoPtr prewhere_info, + const MergeTreeReadPoolBase::PoolSettings & pool_settings, + const Settings & settings) +{ + size_t min_marks_per_task = pool_settings.min_marks_for_concurrent_read; + const size_t part_marks_count = part.getMarksCount(); + if (part_marks_count && part.data_part->isStoredOnRemoteDisk()) + { + /// We assume that most of the time prewhere does it's job good meaning that lion's share of the rows is filtered out. + /// Which means in turn that for most of the rows we will read only the columns from prewhere clause. + /// So it makes sense to use only them for the estimation. + const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info + ? prewhere_info->prewhere_actions.getRequiredColumnsNames() + : columns_to_read; + const size_t part_compressed_bytes = getApproxSizeOfPart(*part.data_part, columns); + + const auto avg_mark_bytes = std::max(part_compressed_bytes / part_marks_count, 1); + const auto min_bytes_per_task = settings.merge_tree_min_bytes_per_task_for_remote_reading; + /// We're taking min here because number of tasks shouldn't be too low - it will make task stealing impossible. + /// We also create at least two tasks per thread to have something to steal from a slow thread. + const auto heuristic_min_marks + = std::min(pool_settings.sum_marks / pool_settings.threads / 2, min_bytes_per_task / avg_mark_bytes); + if (heuristic_min_marks > min_marks_per_task) + { + LOG_TEST( + &Poco::Logger::get("MergeTreeReadPoolBase"), + "Increasing min_marks_per_task from {} to {} based on columns size heuristic", + min_marks_per_task, + heuristic_min_marks); + min_marks_per_task = heuristic_min_marks; + } + } + LOG_TEST(&Poco::Logger::get("MergeTreeReadPoolBase"), "Will use min_marks_per_task={}", min_marks_per_task); + return min_marks_per_task; +} + +void MergeTreeReadPoolBase::fillPerPartInfos(const Settings & settings) { per_part_infos.reserve(parts_ranges.size()); is_part_on_remote_disk.reserve(parts_ranges.size()); @@ -101,6 +153,8 @@ void MergeTreeReadPoolBase::fillPerPartInfos() } is_part_on_remote_disk.push_back(part_with_ranges.data_part->isStoredOnRemoteDisk()); + read_task_info.min_marks_per_task + = calculateMinMarksPerTask(part_with_ranges, column_names, prewhere_info, pool_settings, settings); per_part_infos.push_back(std::make_shared(std::move(read_task_info))); } } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.h b/src/Storages/MergeTree/MergeTreeReadPoolBase.h index 1b5bfec5898..123f7538ba8 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.h @@ -48,7 +48,7 @@ protected: const UncompressedCachePtr owned_uncompressed_cache; const Block header; - void fillPerPartInfos(); + void fillPerPartInfos(const Settings & settings); std::vector getPerPartSumMarks() const; MergeTreeReadTaskPtr createTask( diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 38035d97f56..33eaf5a49bd 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -33,7 +33,11 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( context_) , extension(std::move(extension_)) , coordination_mode(CoordinationMode::Default) + , min_marks_per_task(pool_settings.min_marks_for_concurrent_read) { + for (const auto & info : per_part_infos) + min_marks_per_task = std::max(min_marks_per_task, info->min_marks_per_task); + extension.all_callback( InitialAllRangesAnnouncement(coordination_mode, parts_ranges.getDescriptions(), extension.number_of_current_replica)); } @@ -50,7 +54,7 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t /*task_id auto result = extension.callback(ParallelReadRequest( coordination_mode, extension.number_of_current_replica, - pool_settings.min_marks_for_concurrent_read * pool_settings.threads, + min_marks_per_task * pool_settings.threads, /// For Default coordination mode we don't need to pass part names. RangesInDataPartsDescription{})); @@ -76,9 +80,9 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t /*task_id MarkRanges ranges_to_read; size_t current_sum_marks = 0; - while (current_sum_marks < pool_settings.min_marks_for_concurrent_read && !current_task.ranges.empty()) + while (current_sum_marks < min_marks_per_task && !current_task.ranges.empty()) { - auto diff = pool_settings.min_marks_for_concurrent_read - current_sum_marks; + auto diff = min_marks_per_task - current_sum_marks; auto range = current_task.ranges.front(); if (range.getNumberOfMarks() > diff) { diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h index ca159edb91c..6ba63cc2c9a 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h @@ -32,6 +32,7 @@ private: const ParallelReadingExtension extension; const CoordinationMode coordination_mode; + size_t min_marks_per_task{0}; RangesInDataPartsDescription buffered_ranges; bool no_more_tasks_available{false}; LoggerPtr log = getLogger("MergeTreeReadPoolParallelReplicas"); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp index 01c0a9f91be..6b5cf978423 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -32,7 +32,11 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd context_) , extension(std::move(extension_)) , mode(mode_) + , min_marks_per_task(pool_settings.min_marks_for_concurrent_read) { + for (const auto & info : per_part_infos) + min_marks_per_task = std::max(min_marks_per_task, info->min_marks_per_task); + for (const auto & part : parts_ranges) request.push_back({part.data_part->info, MarkRanges{}}); @@ -76,12 +80,8 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicasInOrder::getTask(size_t ta if (no_more_tasks) return nullptr; - auto response = extension.callback(ParallelReadRequest( - mode, - extension.number_of_current_replica, - pool_settings.min_marks_for_concurrent_read * request.size(), - request - )); + auto response + = extension.callback(ParallelReadRequest(mode, extension.number_of_current_replica, min_marks_per_task * request.size(), request)); if (!response || response->description.empty() || response->finish) { diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h index 4fe3f7a699c..22841bea212 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h @@ -30,6 +30,7 @@ private: const ParallelReadingExtension extension; const CoordinationMode mode; + size_t min_marks_per_task{0}; bool no_more_tasks{false}; RangesInDataPartsDescription request; RangesInDataPartsDescription buffered_tasks; diff --git a/src/Storages/MergeTree/MergeTreeReadTask.h b/src/Storages/MergeTree/MergeTreeReadTask.h index 794e70a0fbb..e90a07e0b55 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.h +++ b/src/Storages/MergeTree/MergeTreeReadTask.h @@ -68,6 +68,8 @@ struct MergeTreeReadTaskInfo MergeTreeBlockSizePredictorPtr shared_size_predictor; /// TODO: comment VirtualFields const_virtual_fields; + /// The amount of data to read per task based on size of the queried columns. + size_t min_marks_per_task = 0; }; using MergeTreeReadTaskInfoPtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index a6298aab3d9..1a0709faf1c 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -59,7 +59,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( if (prewhere_info) LOG_TEST(log, "Original PREWHERE DAG:\n{}\nPREWHERE actions:\n{}", - (prewhere_info->prewhere_actions ? prewhere_info->prewhere_actions->dumpDAG(): std::string("")), + prewhere_info->prewhere_actions.dumpDAG(), (!prewhere_actions.steps.empty() ? prewhere_actions.dump() : std::string(""))); } @@ -80,7 +80,7 @@ PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr pr PrewhereExprStep row_level_filter_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(prewhere_info->row_level_filter, actions_settings), + .actions = std::make_shared(prewhere_info->row_level_filter->clone(), actions_settings), .filter_column_name = prewhere_info->row_level_column_name, .remove_filter_column = true, .need_filter = true, @@ -96,7 +96,7 @@ PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr pr PrewhereExprStep prewhere_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(prewhere_info->prewhere_actions, actions_settings), + .actions = std::make_shared(prewhere_info->prewhere_actions.clone(), actions_settings), .filter_column_name = prewhere_info->prewhere_column_name, .remove_filter_column = prewhere_info->remove_prewhere_column, .need_filter = prewhere_info->need_filter, diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 07476e8b2e9..39aa191a3d2 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -15,16 +15,11 @@ #include #include #include +#include namespace DB { -namespace ErrorCodes -{ - extern const int MEMORY_LIMIT_EXCEEDED; -} - - /// Lightweight (in terms of logic) stream for reading single part from /// MergeTree, used for merges and mutations. /// @@ -281,7 +276,7 @@ try catch (...) { /// Suspicion of the broken part. A part is added to the queue for verification. - if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED) + if (!isRetryableException(std::current_exception())) storage.reportBrokenPart(data_part); throw; } @@ -353,7 +348,7 @@ public: MergeTreeData::DataPartPtr data_part_, Names columns_to_read_, bool apply_deleted_mask_, - ActionsDAGPtr filter_, + std::optional filter_, ContextPtr context_, LoggerPtr log_) : ISourceStep(DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)}) @@ -380,7 +375,7 @@ public: { const auto & primary_key = storage_snapshot->metadata->getPrimaryKey(); const Names & primary_key_column_names = primary_key.column_names; - KeyCondition key_condition(filter, context, primary_key_column_names, primary_key.expression); + KeyCondition key_condition(&*filter, context, primary_key_column_names, primary_key.expression); LOG_DEBUG(log, "Key condition: {}", key_condition.toString()); if (!key_condition.alwaysFalse()) @@ -421,7 +416,7 @@ private: MergeTreeData::DataPartPtr data_part; Names columns_to_read; bool apply_deleted_mask; - ActionsDAGPtr filter; + std::optional filter; ContextPtr context; LoggerPtr log; }; @@ -434,14 +429,14 @@ void createReadFromPartStep( MergeTreeData::DataPartPtr data_part, Names columns_to_read, bool apply_deleted_mask, - ActionsDAGPtr filter, + std::optional filter, ContextPtr context, LoggerPtr log) { auto reading = std::make_unique(type, storage, storage_snapshot, std::move(data_part), std::move(columns_to_read), apply_deleted_mask, - filter, std::move(context), log); + std::move(filter), std::move(context), log); plan.addStep(std::move(reading)); } diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h index e6f055f776c..1b05512b9a3 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.h +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h @@ -38,7 +38,7 @@ void createReadFromPartStep( MergeTreeData::DataPartPtr data_part, Names columns_to_read, bool apply_deleted_mask, - ActionsDAGPtr filter, + std::optional filter, ContextPtr context, LoggerPtr log); diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp index 43e3b0c505a..36ff6c0a4bd 100644 --- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp +++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp @@ -10,6 +10,9 @@ namespace DB { +class ActionsDAG; +using ActionsDAGPtr = std::unique_ptr; + namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -50,7 +53,7 @@ void fillRequiredColumns(const ActionsDAG::Node * node, std::unordered_map; const ActionsDAG::Node & addClonedDAGToDAG( size_t step, const ActionsDAG::Node * original_dag_node, - ActionsDAGPtr new_dag, + const ActionsDAGPtr & new_dag, OriginalToNewNodeMap & node_remap, NodeNameToLastUsedStepMap & node_to_step_map) { @@ -72,7 +75,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( { /// If the node is already in the new DAG, return it const auto & node_ref = node_remap.at(node_name); - if (node_ref.dag == new_dag) + if (node_ref.dag == new_dag.get()) return *node_ref.node; /// If the node is known from the previous steps, add it as an input, except for constants @@ -80,7 +83,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( { node_ref.dag->addOrReplaceInOutputs(*node_ref.node); const auto & new_node = new_dag->addInput(node_ref.node->result_name, node_ref.node->result_type); - node_remap[node_name] = {new_dag, &new_node}; /// TODO: here we update the node reference. Is it always correct? + node_remap[node_name] = {new_dag.get(), &new_node}; /// TODO: here we update the node reference. Is it always correct? /// Remember the index of the last step which reuses this node. /// We cannot remove this node from the outputs before that step. @@ -93,7 +96,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( if (original_dag_node->type == ActionsDAG::ActionType::INPUT) { const auto & new_node = new_dag->addInput(original_dag_node->result_name, original_dag_node->result_type); - node_remap[node_name] = {new_dag, &new_node}; + node_remap[node_name] = {new_dag.get(), &new_node}; return new_node; } @@ -102,7 +105,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( { const auto & new_node = new_dag->addColumn( ColumnWithTypeAndName(original_dag_node->column, original_dag_node->result_type, original_dag_node->result_name)); - node_remap[node_name] = {new_dag, &new_node}; + node_remap[node_name] = {new_dag.get(), &new_node}; return new_node; } @@ -110,7 +113,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( { const auto & alias_child = addClonedDAGToDAG(step, original_dag_node->children[0], new_dag, node_remap, node_to_step_map); const auto & new_node = new_dag->addAlias(alias_child, original_dag_node->result_name); - node_remap[node_name] = {new_dag, &new_node}; + node_remap[node_name] = {new_dag.get(), &new_node}; return new_node; } @@ -125,7 +128,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( } const auto & new_node = new_dag->addFunction(original_dag_node->function_base, new_children, original_dag_node->result_name); - node_remap[node_name] = {new_dag, &new_node}; + node_remap[node_name] = {new_dag.get(), &new_node}; return new_node; } @@ -133,13 +136,13 @@ const ActionsDAG::Node & addClonedDAGToDAG( } const ActionsDAG::Node & addFunction( - ActionsDAGPtr new_dag, + const ActionsDAGPtr & new_dag, const FunctionOverloadResolverPtr & function, ActionsDAG::NodeRawConstPtrs children, OriginalToNewNodeMap & node_remap) { const auto & new_node = new_dag->addFunction(function, children, ""); - node_remap[new_node.result_name] = {new_dag, &new_node}; + node_remap[new_node.result_name] = {new_dag.get(), &new_node}; return new_node; } @@ -147,7 +150,7 @@ const ActionsDAG::Node & addFunction( /// This is different from ActionsDAG::addCast() because it set the name equal to the original name effectively hiding the value before cast, /// but it might be required for further steps with its original uncasted type. const ActionsDAG::Node & addCast( - ActionsDAGPtr dag, + const ActionsDAGPtr & dag, const ActionsDAG::Node & node_to_cast, const String & type_name, OriginalToNewNodeMap & node_remap) @@ -173,7 +176,7 @@ const ActionsDAG::Node & addCast( /// 1. produces a result with the proper Nullable or non-Nullable UInt8 type and /// 2. makes sure that the result contains only 0 or 1 values even if the source column contains non-boolean values. const ActionsDAG::Node & addAndTrue( - ActionsDAGPtr dag, + const ActionsDAGPtr & dag, const ActionsDAG::Node & filter_node_to_normalize, OriginalToNewNodeMap & node_remap) { @@ -213,11 +216,11 @@ const ActionsDAG::Node & addAndTrue( /// 8. Add computation of the remaining outputs to the last step with the procedure similar to 4 bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, PrewhereExprInfo & prewhere) { - if (!prewhere_info || !prewhere_info->prewhere_actions) + if (!prewhere_info) return true; /// 1. List all condition nodes that are combined with AND into PREWHERE condition - const auto & condition_root = prewhere_info->prewhere_actions->findInOutputs(prewhere_info->prewhere_column_name); + const auto & condition_root = prewhere_info->prewhere_actions.findInOutputs(prewhere_info->prewhere_column_name); const bool is_conjunction = (condition_root.type == ActionsDAG::ActionType::FUNCTION && condition_root.function_base->getName() == "and"); if (!is_conjunction) return false; @@ -258,7 +261,7 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction for (size_t step_index = 0; step_index < condition_groups.size(); ++step_index) { const auto & condition_group = condition_groups[step_index]; - ActionsDAGPtr step_dag = std::make_shared(); + ActionsDAGPtr step_dag = std::make_unique(); String result_name; std::vector new_condition_nodes; @@ -299,11 +302,11 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction } } - steps.push_back({step_dag, result_name}); + steps.push_back({std::move(step_dag), result_name}); } /// 6. Find all outputs of the original DAG - auto original_outputs = prewhere_info->prewhere_actions->getOutputs(); + auto original_outputs = prewhere_info->prewhere_actions.getOutputs(); /// 7. Find all outputs that were computed in the already built DAGs, mark these nodes as outputs in the steps where they were computed /// 8. Add computation of the remaining outputs to the last step with the procedure similar to 4 NameSet all_output_names; @@ -345,11 +348,11 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction { for (size_t step_index = 0; step_index < steps.size(); ++step_index) { - const auto & step = steps[step_index]; + auto & step = steps[step_index]; PrewhereExprStep new_step { .type = PrewhereExprStep::Filter, - .actions = std::make_shared(step.actions, actions_settings), + .actions = std::make_shared(std::move(*step.actions), actions_settings), .filter_column_name = step.column_name, /// Don't remove if it's in the list of original outputs .remove_filter_column = diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 44ab12bde2a..43c40dee77d 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -113,7 +113,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons LOG_DEBUG(log, "MergeTreeWhereOptimizer: condition \"{}\" moved to PREWHERE", select.prewhere()->formatForLogging(log_queries_cut_to_length)); } -MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::optimize(const ActionsDAGPtr & filter_dag, +MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::optimize(const ActionsDAG & filter_dag, const std::string & filter_column_name, const ContextPtr & context, bool is_final) @@ -127,7 +127,7 @@ MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::op where_optimizer_context.use_statistics = context->getSettingsRef().allow_statistics_optimize; RPNBuilderTreeContext tree_context(context); - RPNBuilderTreeNode node(&filter_dag->findInOutputs(filter_column_name), tree_context); + RPNBuilderTreeNode node(&filter_dag.findInOutputs(filter_column_name), tree_context); auto optimize_result = optimizeImpl(node, where_optimizer_context); if (!optimize_result) diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index ba6b4660924..a3d035675c6 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -52,7 +52,7 @@ public: bool fully_moved_to_prewhere = false; }; - FilterActionsOptimizeResult optimize(const ActionsDAGPtr & filter_dag, + FilterActionsOptimizeResult optimize(const ActionsDAG & filter_dag, const std::string & filter_column_name, const ContextPtr & context, bool is_final); diff --git a/src/Storages/MergeTree/PartitionPruner.cpp b/src/Storages/MergeTree/PartitionPruner.cpp index 9de7b238f57..6df7b5aa054 100644 --- a/src/Storages/MergeTree/PartitionPruner.cpp +++ b/src/Storages/MergeTree/PartitionPruner.cpp @@ -4,7 +4,7 @@ namespace DB { -PartitionPruner::PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAGPtr filter_actions_dag, ContextPtr context, bool strict) +PartitionPruner::PartitionPruner(const StorageMetadataPtr & metadata, const ActionsDAG * filter_actions_dag, ContextPtr context, bool strict) : partition_key(MergeTreePartition::adjustPartitionKey(metadata, context)) , partition_condition(filter_actions_dag, context, partition_key.column_names, partition_key.expression, true /* single_point */) , useless((strict && partition_condition.isRelaxed()) || partition_condition.alwaysUnknownOrTrue()) diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h index ca24559ca01..d89dfb7b245 100644 --- a/src/Storages/MergeTree/PartitionPruner.h +++ b/src/Storages/MergeTree/PartitionPruner.h @@ -13,7 +13,7 @@ namespace DB class PartitionPruner { public: - PartitionPruner(const StorageMetadataPtr & metadata, ActionsDAGPtr filter_actions_dag, ContextPtr context, bool strict = false); + PartitionPruner(const StorageMetadataPtr & metadata, const ActionsDAG * filter_actions_dag, ContextPtr context, bool strict = false); bool canBePruned(const IMergeTreeDataPart & part) const; diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index 93087b0d3fe..6e963066f39 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -399,7 +399,7 @@ size_t RPNBuilderFunctionTreeNode::getArgumentsSize() const { const auto * adaptor = typeid_cast(dag_node->function_base.get()); const auto * index_hint = typeid_cast(adaptor->getFunction().get()); - return index_hint->getActions()->getOutputs().size(); + return index_hint->getActions().getOutputs().size(); } return dag_node->children.size(); @@ -427,7 +427,7 @@ RPNBuilderTreeNode RPNBuilderFunctionTreeNode::getArgumentAt(size_t index) const { const auto & adaptor = typeid_cast(*dag_node->function_base); const auto & index_hint = typeid_cast(*adaptor.getFunction()); - return RPNBuilderTreeNode(index_hint.getActions()->getOutputs()[index], tree_context); + return RPNBuilderTreeNode(index_hint.getActions().getOutputs()[index], tree_context); } return RPNBuilderTreeNode(dag_node->children[index], tree_context); diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 774fd95ebc6..fb86d9e7603 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -36,11 +36,13 @@ namespace ErrorCodes extern const int CANNOT_ALLOCATE_MEMORY; extern const int CANNOT_MUNMAP; extern const int CANNOT_MREMAP; + extern const int CANNOT_SCHEDULE_TASK; extern const int UNEXPECTED_FILE_IN_DATA_PART; extern const int NO_FILE_IN_DATA_PART; extern const int NETWORK_ERROR; extern const int SOCKET_TIMEOUT; extern const int BROKEN_PROJECTION; + extern const int ABORTED; } @@ -85,7 +87,9 @@ bool isRetryableException(std::exception_ptr exception_ptr) { return isNotEnoughMemoryErrorCode(e.code()) || e.code() == ErrorCodes::NETWORK_ERROR - || e.code() == ErrorCodes::SOCKET_TIMEOUT; + || e.code() == ErrorCodes::SOCKET_TIMEOUT + || e.code() == ErrorCodes::CANNOT_SCHEDULE_TASK + || e.code() == ErrorCodes::ABORTED; } catch (const Poco::Net::NetException &) { @@ -329,16 +333,21 @@ static IMergeTreeDataPart::Checksums checkDataPart( projections_on_disk.erase(projection_file); } - if (throw_on_broken_projection && !broken_projections_message.empty()) + if (throw_on_broken_projection) { - throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message); - } + if (!broken_projections_message.empty()) + { + throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message); + } - if (require_checksums && !projections_on_disk.empty()) - { - throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART, - "Found unexpected projection directories: {}", - fmt::join(projections_on_disk, ",")); + /// This one is actually not broken, just redundant files on disk which + /// MergeTree will never use. + if (require_checksums && !projections_on_disk.empty()) + { + throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART, + "Found unexpected projection directories: {}", + fmt::join(projections_on_disk, ",")); + } } if (is_cancelled()) diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 8f0e2d76473..9d728c3395f 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -49,6 +49,7 @@ StorageNATS::StorageNATS( const StorageID & table_id_, ContextPtr context_, const ColumnsDescription & columns_, + const String & comment, std::unique_ptr nats_settings_, LoadingStrictnessLevel mode) : IStorage(table_id_) @@ -87,6 +88,7 @@ StorageNATS::StorageNATS( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); setVirtuals(createVirtuals(nats_settings->nats_handle_error_mode)); @@ -760,7 +762,7 @@ void registerStorageNATS(StorageFactory & factory) if (!nats_settings->nats_subjects.changed) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `nats_subjects` setting"); - return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(nats_settings), args.mode); + return std::make_shared(args.table_id, args.getContext(), args.columns, args.comment, std::move(nats_settings), args.mode); }; factory.registerStorage("NATS", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h index 41d77acfde6..5fca8cb0163 100644 --- a/src/Storages/NATS/StorageNATS.h +++ b/src/Storages/NATS/StorageNATS.h @@ -23,6 +23,7 @@ public: const StorageID & table_id_, ContextPtr context_, const ColumnsDescription & columns_, + const String & comment, std::unique_ptr nats_settings_, LoadingStrictnessLevel mode); diff --git a/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp index 21df7e35284..3bbc4e8a2ea 100644 --- a/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp @@ -91,9 +91,9 @@ void AsynchronousReadBufferFromHDFS::prefetch(Priority priority) } -size_t AsynchronousReadBufferFromHDFS::getFileSize() +std::optional AsynchronousReadBufferFromHDFS::tryGetFileSize() { - return impl->getFileSize(); + return impl->tryGetFileSize(); } String AsynchronousReadBufferFromHDFS::getFileName() const diff --git a/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h index 5aef92315a4..9846d74453b 100644 --- a/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h +++ b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h @@ -35,7 +35,7 @@ public: void prefetch(Priority priority) override; - size_t getFileSize() override; + std::optional tryGetFileSize() override; String getFileName() const override; diff --git a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp index be339d021dc..bf6f9db722c 100644 --- a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp @@ -31,7 +31,7 @@ namespace ErrorCodes } -struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory +struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory, public WithFileSize { String hdfs_uri; String hdfs_file_path; @@ -90,7 +90,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory tryGetFileSize() override { return file_size; } @@ -191,9 +191,9 @@ ReadBufferFromHDFS::ReadBufferFromHDFS( ReadBufferFromHDFS::~ReadBufferFromHDFS() = default; -size_t ReadBufferFromHDFS::getFileSize() +std::optional ReadBufferFromHDFS::tryGetFileSize() { - return impl->getFileSize(); + return impl->tryGetFileSize(); } bool ReadBufferFromHDFS::nextImpl() diff --git a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h index d9671e7e445..5363f07967b 100644 --- a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h +++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h @@ -40,7 +40,7 @@ public: off_t getPosition() override; - size_t getFileSize() override; + std::optional tryGetFileSize() override; size_t getFileOffsetOfBufferEnd() const override; diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index d739f7a28c9..df78f128c80 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -163,7 +163,9 @@ ReadBufferIterator::Data ReadBufferIterator::next() { for (const auto & object_info : read_keys) { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName())) + auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName()); + /// Use this format only if we have a schema reader for it. + if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name)) { format = format_from_file_name; break; @@ -221,7 +223,9 @@ ReadBufferIterator::Data ReadBufferIterator::next() { for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it) { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName())) + auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName()); + /// Use this format only if we have a schema reader for it. + if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name)) { format = format_from_file_name; break; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 086482e330a..e760098f10f 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -77,7 +77,7 @@ StorageObjectStorageSource::~StorageObjectStorageSource() create_reader_pool->wait(); } -void StorageObjectStorageSource::setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) +void StorageObjectStorageSource::setKeyCondition(const std::optional & filter_actions_dag, ContextPtr context_) { setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.format_header); } @@ -139,7 +139,10 @@ std::shared_ptr StorageObjectStorageSourc paths.reserve(keys.size()); for (const auto & key : keys) paths.push_back(fs::path(configuration->getNamespace()) / key); - VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context); + + VirtualColumnUtils::buildSetsForDAG(*filter_dag, local_context); + auto actions = std::make_shared(std::move(*filter_dag)); + VirtualColumnUtils::filterByPathOrFile(keys, paths, actions, virtual_columns); copy_configuration->setPaths(keys); } @@ -506,7 +509,11 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( } recursive = key_with_globs == "/**"; - filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); + if (auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns)) + { + VirtualColumnUtils::buildSetsForDAG(*filter_dag, getContext()); + filter_expr = std::make_shared(std::move(*filter_dag)); + } } else { @@ -570,14 +577,14 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne ++it; } - if (filter_dag) + if (filter_expr) { std::vector paths; paths.reserve(new_batch.size()); for (const auto & object_info : new_batch) paths.push_back(getUniqueStoragePathIdentifier(*configuration, *object_info, false)); - VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); + VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_expr, virtual_columns); LOG_TEST(logger, "Filtered files: {} -> {}", paths.size(), new_batch.size()); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 271b38fa75c..e466621e1e1 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -46,7 +46,7 @@ public: String getName() const override { return name; } - void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override; + void setKeyCondition(const std::optional & filter_actions_dag, ContextPtr context_) override; Chunk generate() override; @@ -208,7 +208,7 @@ private: ObjectInfos object_infos; ObjectInfos * read_keys; - ActionsDAGPtr filter_dag; + ExpressionActionsPtr filter_expr; ObjectStorageIteratorPtr object_storage_iterator; bool recursive{false}; std::vector expanded_keys; diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 14b828e7268..f51a7a913b8 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -36,6 +36,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; + extern const int BAD_QUERY_PARAMETER; extern const int QUERY_NOT_ALLOWED; } @@ -150,7 +151,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue( } else if (!configuration->isPathWithGlobs()) { - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "ObjectStorageQueue url must either end with '/' or contain globs"); + throw Exception(ErrorCodes::BAD_QUERY_PARAMETER, "ObjectStorageQueue url must either end with '/' or contain globs"); } checkAndAdjustSettings(*queue_settings, engine_args, mode > LoadingStrictnessLevel::CREATE, log); diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index a904b29e12f..a9778d5d04d 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -10,7 +10,6 @@ #include #include -#include #include #include #include @@ -22,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -295,7 +295,7 @@ std::shared_ptr StorageMaterializedPostgreSQL::getMaterial auto column_declaration = std::make_shared(); column_declaration->name = std::move(name); - column_declaration->type = makeASTFunction(type); + column_declaration->type = makeASTDataType(type); column_declaration->default_specifier = "MATERIALIZED"; column_declaration->default_expression = std::make_shared(default_value); @@ -312,17 +312,17 @@ ASTPtr StorageMaterializedPostgreSQL::getColumnDeclaration(const DataTypePtr & d WhichDataType which(data_type); if (which.isNullable()) - return makeASTFunction("Nullable", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); + return makeASTDataType("Nullable", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); if (which.isArray()) - return makeASTFunction("Array", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); + return makeASTDataType("Array", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); /// getName() for decimal returns 'Decimal(precision, scale)', will get an error with it if (which.isDecimal()) { auto make_decimal_expression = [&](std::string type_name) { - auto ast_expression = std::make_shared(); + auto ast_expression = std::make_shared(); ast_expression->name = type_name; ast_expression->arguments = std::make_shared(); @@ -346,7 +346,7 @@ ASTPtr StorageMaterializedPostgreSQL::getColumnDeclaration(const DataTypePtr & d if (which.isDateTime64()) { - auto ast_expression = std::make_shared(); + auto ast_expression = std::make_shared(); ast_expression->name = "DateTime64"; ast_expression->arguments = std::make_shared(); @@ -354,7 +354,7 @@ ASTPtr StorageMaterializedPostgreSQL::getColumnDeclaration(const DataTypePtr & d return ast_expression; } - return std::make_shared(data_type->getName()); + return makeASTDataType(data_type->getName()); } @@ -571,6 +571,7 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory) StorageInMemoryMetadata metadata; metadata.setColumns(args.columns); metadata.setConstraints(args.constraints); + metadata.setComment(args.comment); if (args.mode <= LoadingStrictnessLevel::CREATE && !args.getLocalContext()->getSettingsRef().allow_experimental_materialized_postgresql_table) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index f3d2aff68c8..9e3c40071b5 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -70,6 +70,7 @@ StorageRabbitMQ::StorageRabbitMQ( const StorageID & table_id_, ContextPtr context_, const ColumnsDescription & columns_, + const String & comment, std::unique_ptr rabbitmq_settings_, LoadingStrictnessLevel mode) : IStorage(table_id_) @@ -145,6 +146,7 @@ StorageRabbitMQ::StorageRabbitMQ( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); setVirtuals(createVirtuals(rabbitmq_settings->rabbitmq_handle_error_mode)); @@ -1288,7 +1290,7 @@ void registerStorageRabbitMQ(StorageFactory & factory) if (!rabbitmq_settings->rabbitmq_format.changed) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `rabbitmq_format` setting"); - return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(rabbitmq_settings), args.mode); + return std::make_shared(args.table_id, args.getContext(), args.columns, args.comment, std::move(rabbitmq_settings), args.mode); }; factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index b8fab5825e4..fed80a4357b 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -26,6 +26,7 @@ public: const StorageID & table_id_, ContextPtr context_, const ColumnsDescription & columns_, + const String & comment, std::unique_ptr rabbitmq_settings_, LoadingStrictnessLevel mode); diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.cpp b/src/Storages/ReadFinalForExternalReplicaStorage.cpp index 5070b4a17d8..add12413853 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.cpp +++ b/src/Storages/ReadFinalForExternalReplicaStorage.cpp @@ -80,7 +80,7 @@ void readFinalFromNestedStorage( auto step = std::make_unique( query_plan.getCurrentDataStream(), - actions, + std::move(actions), filter_column_name, false); diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 409703c84c6..fafc72da04e 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -691,6 +691,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) StorageInMemoryMetadata metadata; metadata.setColumns(args.columns); metadata.setConstraints(args.constraints); + metadata.setComment(args.comment); if (!args.storage_def->primary_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageEmbeddedRocksDB must require one column in primary key"); diff --git a/src/Storages/SelectQueryInfo.cpp b/src/Storages/SelectQueryInfo.cpp index d59ccf0dfaf..c9c96ed5837 100644 --- a/src/Storages/SelectQueryInfo.cpp +++ b/src/Storages/SelectQueryInfo.cpp @@ -18,7 +18,7 @@ std::unordered_map SelectQueryInfo::buildNod std::unordered_map node_name_to_input_node_column; if (planner_context) { - const auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); + auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) { diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 848aeb59dad..7ad6a733c6f 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -18,9 +18,6 @@ namespace DB class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; -class ActionsDAG; -using ActionsDAGPtr = std::shared_ptr; - struct PrewhereInfo; using PrewhereInfoPtr = std::shared_ptr; @@ -46,9 +43,9 @@ struct PrewhereInfo { /// Actions for row level security filter. Applied separately before prewhere_actions. /// This actions are separate because prewhere condition should not be executed over filtered rows. - ActionsDAGPtr row_level_filter; + std::optional row_level_filter; /// Actions which are executed on block in order to get filter column for prewhere step. - ActionsDAGPtr prewhere_actions; + ActionsDAG prewhere_actions; String row_level_column_name; String prewhere_column_name; bool remove_prewhere_column = false; @@ -56,7 +53,7 @@ struct PrewhereInfo bool generated_by_optimizer = false; PrewhereInfo() = default; - explicit PrewhereInfo(ActionsDAGPtr prewhere_actions_, String prewhere_column_name_) + explicit PrewhereInfo(ActionsDAG prewhere_actions_, String prewhere_column_name_) : prewhere_actions(std::move(prewhere_actions_)), prewhere_column_name(std::move(prewhere_column_name_)) {} std::string dump() const; @@ -68,8 +65,7 @@ struct PrewhereInfo if (row_level_filter) prewhere_info->row_level_filter = row_level_filter->clone(); - if (prewhere_actions) - prewhere_info->prewhere_actions = prewhere_actions->clone(); + prewhere_info->prewhere_actions = prewhere_actions.clone(); prewhere_info->row_level_column_name = row_level_column_name; prewhere_info->prewhere_column_name = prewhere_column_name; @@ -93,7 +89,7 @@ struct FilterInfo /// Same as FilterInfo, but with ActionsDAG. struct FilterDAGInfo { - ActionsDAGPtr actions; + ActionsDAG actions; String column_name; bool do_remove_column = false; @@ -202,7 +198,7 @@ struct SelectQueryInfo ASTPtr parallel_replica_custom_key_ast; /// Filter actions dag for current storage - ActionsDAGPtr filter_actions_dag; + std::shared_ptr filter_actions_dag; ReadInOrderOptimizerPtr order_optimizer; /// Can be modified while reading from storage diff --git a/src/Storages/Statistics/ConditionSelectivityEstimator.cpp b/src/Storages/Statistics/ConditionSelectivityEstimator.cpp index 757136fdf42..57dff958b9a 100644 --- a/src/Storages/Statistics/ConditionSelectivityEstimator.cpp +++ b/src/Storages/Statistics/ConditionSelectivityEstimator.cpp @@ -16,7 +16,7 @@ void ConditionSelectivityEstimator::ColumnSelectivityEstimator::merge(String par part_statistics[part_name] = stats; } -Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(Float64 val, Float64 rows) const +Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(const Field & val, Float64 rows) const { if (part_statistics.empty()) return default_normal_cond_factor * rows; @@ -30,16 +30,19 @@ Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess( return result * rows / part_rows; } -Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(Float64 val, Float64 rows) const +Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(const Field & val, Float64 rows) const { return rows - estimateLess(val, rows); } -Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(Float64 val, Float64 rows) const +Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(const Field & val, Float64 rows) const { if (part_statistics.empty()) { - if (val < - threshold || val > threshold) + auto float_val = StatisticsUtils::tryConvertToFloat64(val); + if (!float_val) + return default_unknown_cond_factor * rows; + else if (float_val.value() < - threshold || float_val.value() > threshold) return default_normal_cond_factor * rows; else return default_good_cond_factor * rows; @@ -87,7 +90,7 @@ static std::pair tryToExtractSingleColumn(const RPNBuilderTreeNod return result; } -std::pair ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const +std::pair ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const { if (!node.isFunction()) return {}; @@ -123,48 +126,35 @@ std::pair ConditionSelectivityEstimator::extractBinaryOp(const DataTypePtr output_type; if (!constant_node->tryGetConstant(output_value, output_type)) return {}; - - const auto type = output_value.getType(); - Float64 value; - if (type == Field::Types::Int64) - value = output_value.get(); - else if (type == Field::Types::UInt64) - value = output_value.get(); - else if (type == Field::Types::Float64) - value = output_value.get(); - else - return {}; - return std::make_pair(function_name, value); + return std::make_pair(function_name, output_value); } Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode & node) const { auto result = tryToExtractSingleColumn(node); if (result.second != 1) - { - return default_unknown_cond_factor; - } + return default_unknown_cond_factor * total_rows; + String col = result.first; auto it = column_estimators.find(col); /// If there the estimator of the column is not found or there are no data at all, /// we use dummy estimation. - bool dummy = total_rows == 0; + bool dummy = false; ColumnSelectivityEstimator estimator; if (it != column_estimators.end()) - { estimator = it->second; - } else - { dummy = true; - } + auto [op, val] = extractBinaryOp(node, col); + if (op == "equals") { if (dummy) { - if (val < - threshold || val > threshold) + auto float_val = StatisticsUtils::tryConvertToFloat64(val); + if (!float_val || (float_val < - threshold || float_val > threshold)) return default_normal_cond_factor * total_rows; else return default_good_cond_factor * total_rows; @@ -187,13 +177,8 @@ Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode return default_unknown_cond_factor * total_rows; } -void ConditionSelectivityEstimator::merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat) +void ConditionSelectivityEstimator::merge(String part_name, ColumnStatisticsPtr column_stat) { - if (!part_names.contains(part_name)) - { - total_rows += part_rows; - part_names.insert(part_name); - } if (column_stat != nullptr) column_estimators[column_stat->columnName()].merge(part_name, column_stat); } diff --git a/src/Storages/Statistics/ConditionSelectivityEstimator.h b/src/Storages/Statistics/ConditionSelectivityEstimator.h index f0599742276..ce7fdd12e92 100644 --- a/src/Storages/Statistics/ConditionSelectivityEstimator.h +++ b/src/Storages/Statistics/ConditionSelectivityEstimator.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { @@ -10,6 +11,14 @@ class RPNBuilderTreeNode; /// It estimates the selectivity of a condition. class ConditionSelectivityEstimator { +public: + /// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ... + /// Right now we only support simple condition like col = val / col < val + Float64 estimateRowCount(const RPNBuilderTreeNode & node) const; + + void merge(String part_name, ColumnStatisticsPtr column_stat); + void addRows(UInt64 part_rows) { total_rows += part_rows; } + private: friend class ColumnStatistics; struct ColumnSelectivityEstimator @@ -20,13 +29,15 @@ private: void merge(String part_name, ColumnStatisticsPtr stats); - Float64 estimateLess(Float64 val, Float64 rows) const; + Float64 estimateLess(const Field & val, Float64 rows) const; - Float64 estimateGreater(Float64 val, Float64 rows) const; + Float64 estimateGreater(const Field & val, Float64 rows) const; - Float64 estimateEqual(Float64 val, Float64 rows) const; + Float64 estimateEqual(const Field & val, Float64 rows) const; }; + std::pair extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const; + static constexpr auto default_good_cond_factor = 0.1; static constexpr auto default_normal_cond_factor = 0.5; static constexpr auto default_unknown_cond_factor = 1.0; @@ -35,16 +46,7 @@ private: static constexpr auto threshold = 2; UInt64 total_rows = 0; - std::set part_names; std::map column_estimators; - std::pair extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const; - -public: - /// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ... - /// Right now we only support simple condition like col = val / col < val - Float64 estimateRowCount(const RPNBuilderTreeNode & node) const; - - void merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat); }; } diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 28e75c6d244..ade3326288a 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -1,15 +1,18 @@ #include +#include +#include +#include #include +#include #include #include #include -#include -#include -#include #include #include +#include "config.h" /// USE_DATASKETCHES + namespace DB { @@ -24,6 +27,36 @@ enum StatisticsFileVersion : UInt16 V0 = 0, }; +std::optional StatisticsUtils::tryConvertToFloat64(const Field & field) +{ + switch (field.getType()) + { + case Field::Types::Int64: + return field.get(); + case Field::Types::UInt64: + return field.get(); + case Field::Types::Float64: + return field.get(); + case Field::Types::Int128: + return field.get(); + case Field::Types::UInt128: + return field.get(); + case Field::Types::Int256: + return field.get(); + case Field::Types::UInt256: + return field.get(); + default: + return {}; + } +} + +std::optional StatisticsUtils::tryConvertToString(const DB::Field & field) +{ + if (field.getType() == Field::Types::String) + return field.get(); + return {}; +} + IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) { @@ -46,12 +79,12 @@ UInt64 IStatistics::estimateCardinality() const throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics"); } -Float64 IStatistics::estimateEqual(Float64 /*val*/) const +Float64 IStatistics::estimateEqual(const Field & /*val*/) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics"); } -Float64 IStatistics::estimateLess(Float64 /*val*/) const +Float64 IStatistics::estimateLess(const Field & /*val*/) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics"); } @@ -66,27 +99,32 @@ Float64 IStatistics::estimateLess(Float64 /*val*/) const /// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics /// object that supports it natively. -Float64 ColumnStatistics::estimateLess(Float64 val) const +Float64 ColumnStatistics::estimateLess(const Field & val) const { if (stats.contains(StatisticsType::TDigest)) return stats.at(StatisticsType::TDigest)->estimateLess(val); return rows * ConditionSelectivityEstimator::default_normal_cond_factor; } -Float64 ColumnStatistics::estimateGreater(Float64 val) const +Float64 ColumnStatistics::estimateGreater(const Field & val) const { return rows - estimateLess(val); } -Float64 ColumnStatistics::estimateEqual(Float64 val) const +Float64 ColumnStatistics::estimateEqual(const Field & val) const { - if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) + auto float_val = StatisticsUtils::tryConvertToFloat64(val); + if (float_val.has_value() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) { /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket. if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048) return stats.at(StatisticsType::TDigest)->estimateEqual(val); } - if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold) +#if USE_DATASKETCHES + if (stats.contains(StatisticsType::CountMinSketch)) + return stats.at(StatisticsType::CountMinSketch)->estimateEqual(val); +#endif + if (!float_val.has_value() && (float_val < - ConditionSelectivityEstimator::threshold || float_val > ConditionSelectivityEstimator::threshold)) return rows * ConditionSelectivityEstimator::default_normal_cond_factor; else return rows * ConditionSelectivityEstimator::default_good_cond_factor; @@ -166,11 +204,16 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va MergeTreeStatisticsFactory::MergeTreeStatisticsFactory() { - registerValidator(StatisticsType::TDigest, TDigestValidator); - registerCreator(StatisticsType::TDigest, TDigestCreator); + registerValidator(StatisticsType::TDigest, tdigestValidator); + registerCreator(StatisticsType::TDigest, tdigestCreator); - registerValidator(StatisticsType::Uniq, UniqValidator); - registerCreator(StatisticsType::Uniq, UniqCreator); + registerValidator(StatisticsType::Uniq, uniqValidator); + registerCreator(StatisticsType::Uniq, uniqCreator); + +#if USE_DATASKETCHES + registerValidator(StatisticsType::CountMinSketch, countMinSketchValidator); + registerCreator(StatisticsType::CountMinSketch, countMinSketchCreator); +#endif } MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance() @@ -197,7 +240,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri { auto it = creators.find(type); if (it == creators.end()) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type); auto stat_ptr = (it->second)(desc, stats.data_type); column_stat->stats[type] = stat_ptr; } diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index d4364075d1c..16f0c67eabd 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -13,6 +14,14 @@ namespace DB constexpr auto STATS_FILE_PREFIX = "statistics_"; constexpr auto STATS_FILE_SUFFIX = ".stats"; + +struct StatisticsUtils +{ + /// Returns std::nullopt if input Field cannot be converted to a concrete value + static std::optional tryConvertToFloat64(const Field & field); + static std::optional tryConvertToString(const Field & field); +}; + /// Statistics describe properties of the values in the column, /// e.g. how many unique values exist, /// what are the N most frequent values, @@ -34,8 +43,8 @@ public: /// Per-value estimations. /// Throws if the statistics object is not able to do a meaningful estimation. - virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column - virtual Float64 estimateLess(Float64 val) const; /// summarized cardinality of values < val in the column + virtual Float64 estimateEqual(const Field & val) const; /// cardinality of val in the column + virtual Float64 estimateLess(const Field & val) const; /// summarized cardinality of values < val in the column protected: SingleStatisticsDescription stat; @@ -58,9 +67,9 @@ public: void update(const ColumnPtr & column); - Float64 estimateLess(Float64 val) const; - Float64 estimateGreater(Float64 val) const; - Float64 estimateEqual(Float64 val) const; + Float64 estimateLess(const Field & val) const; + Float64 estimateGreater(const Field & val) const; + Float64 estimateEqual(const Field & val) const; private: friend class MergeTreeStatisticsFactory; diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.cpp b/src/Storages/Statistics/StatisticsCountMinSketch.cpp new file mode 100644 index 00000000000..e69bbc1515b --- /dev/null +++ b/src/Storages/Statistics/StatisticsCountMinSketch.cpp @@ -0,0 +1,102 @@ + +#include +#include +#include +#include +#include +#include + +#if USE_DATASKETCHES + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +extern const int ILLEGAL_STATISTICS; +} + +/// Constants chosen based on rolling dices. +/// The values provides: +/// 1. an error tolerance of 0.1% (ε = 0.001) +/// 2. a confidence level of 99.9% (δ = 0.001). +/// And sketch the size is 152kb. +static constexpr auto num_hashes = 7uz; +static constexpr auto num_buckets = 2718uz; + +StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_) + : IStatistics(stat_) + , sketch(num_hashes, num_buckets) + , data_type(data_type_) +{ +} + +Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const +{ + /// Try to convert field to data_type. Converting string to proper data types such as: number, date, datetime, IPv4, Decimal etc. + /// Return null if val larger than the range of data_type + /// + /// For example: if data_type is Int32: + /// 1. For 1.0, 1, '1', return Field(1) + /// 2. For 1.1, max_value_int64, return null + Field val_converted = convertFieldToType(val, *data_type); + if (val_converted.isNull()) + return 0; + + if (data_type->isValueRepresentedByNumber()) + return sketch.get_estimate(&val_converted, data_type->getSizeOfValueInMemory()); + + if (isStringOrFixedString(data_type)) + return sketch.get_estimate(val.get()); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName()); +} + +void StatisticsCountMinSketch::update(const ColumnPtr & column) +{ + for (size_t row = 0; row < column->size(); ++row) + { + if (column->isNullAt(row)) + continue; + auto data = column->getDataAt(row); + sketch.update(data.data, data.size, 1); + } +} + +void StatisticsCountMinSketch::serialize(WriteBuffer & buf) +{ + Sketch::vector_bytes bytes = sketch.serialize(); + writeIntBinary(static_cast(bytes.size()), buf); + buf.write(reinterpret_cast(bytes.data()), bytes.size()); +} + +void StatisticsCountMinSketch::deserialize(ReadBuffer & buf) +{ + UInt64 size; + readIntBinary(size, buf); + + Sketch::vector_bytes bytes; + bytes.resize(size); /// To avoid 'container-overflow' in AddressSanitizer checking + buf.readStrict(reinterpret_cast(bytes.data()), size); + + sketch = Sketch::deserialize(bytes.data(), size); +} + + +void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +{ + data_type = removeNullable(data_type); + data_type = removeLowCardinalityAndNullable(data_type); + if (!data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type)) + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName()); +} + +StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +{ + return std::make_shared(stat, data_type); +} + +} + +#endif diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.h b/src/Storages/Statistics/StatisticsCountMinSketch.h new file mode 100644 index 00000000000..6c8b74f8c35 --- /dev/null +++ b/src/Storages/Statistics/StatisticsCountMinSketch.h @@ -0,0 +1,39 @@ +#pragma once + +#include + +#include "config.h" + +#if USE_DATASKETCHES + +#include + +namespace DB +{ + +class StatisticsCountMinSketch : public IStatistics +{ +public: + StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_); + + Float64 estimateEqual(const Field & val) const override; + + void update(const ColumnPtr & column) override; + + void serialize(WriteBuffer & buf) override; + void deserialize(ReadBuffer & buf) override; + +private: + using Sketch = datasketches::count_min_sketch; + Sketch sketch; + + DataTypePtr data_type; +}; + + +void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr); + +} + +#endif diff --git a/src/Storages/Statistics/StatisticsTDigest.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp index 0747197370c..66150e00fdb 100644 --- a/src/Storages/Statistics/StatisticsTDigest.cpp +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -1,11 +1,13 @@ #include #include +#include namespace DB { namespace ErrorCodes { - extern const int ILLEGAL_STATISTICS; +extern const int ILLEGAL_STATISTICS; +extern const int LOGICAL_ERROR; } StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_) @@ -16,12 +18,16 @@ StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_) void StatisticsTDigest::update(const ColumnPtr & column) { size_t rows = column->size(); - for (size_t row = 0; row < rows; ++row) { - /// TODO: support more types. - Float64 value = column->getFloat64(row); - t_digest.add(value, 1); + Field field; + column->get(row, field); + + if (field.isNull()) + continue; + + if (auto field_as_float = StatisticsUtils::tryConvertToFloat64(field)) + t_digest.add(*field_as_float, 1); } } @@ -35,24 +41,31 @@ void StatisticsTDigest::deserialize(ReadBuffer & buf) t_digest.deserialize(buf); } -Float64 StatisticsTDigest::estimateLess(Float64 val) const +Float64 StatisticsTDigest::estimateLess(const Field & val) const { - return t_digest.getCountLessThan(val); + auto val_as_float = StatisticsUtils::tryConvertToFloat64(val); + if (val_as_float) + return t_digest.getCountLessThan(*val_as_float); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName()); } -Float64 StatisticsTDigest::estimateEqual(Float64 val) const +Float64 StatisticsTDigest::estimateEqual(const Field & val) const { - return t_digest.getCountEqual(val); + auto val_as_float = StatisticsUtils::tryConvertToFloat64(val); + if (val_as_float) + return t_digest.getCountEqual(*val_as_float); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName()); } -void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); + data_type = removeLowCardinalityAndNullable(data_type); if (!data_type->isValueRepresentedByNumber()) throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); } -StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) +StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) { return std::make_shared(stat); } diff --git a/src/Storages/Statistics/StatisticsTDigest.h b/src/Storages/Statistics/StatisticsTDigest.h index d3a3bf115ee..614973e5d8b 100644 --- a/src/Storages/Statistics/StatisticsTDigest.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -16,14 +16,14 @@ public: void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - Float64 estimateLess(Float64 val) const override; - Float64 estimateEqual(Float64 val) const override; + Float64 estimateLess(const Field & val) const override; + Float64 estimateEqual(const Field & val) const override; private: QuantileTDigest t_digest; }; -void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); +void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); } diff --git a/src/Storages/Statistics/StatisticsUniq.cpp b/src/Storages/Statistics/StatisticsUniq.cpp index bf9a40ea8cb..8f60ffcf0b5 100644 --- a/src/Storages/Statistics/StatisticsUniq.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { @@ -51,14 +52,15 @@ UInt64 StatisticsUniq::estimateCardinality() const return column->getUInt(0); } -void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); + data_type = removeLowCardinalityAndNullable(data_type); if (!data_type->isValueRepresentedByNumber()) throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } -StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { return std::make_shared(stat, data_type); } diff --git a/src/Storages/Statistics/StatisticsUniq.h b/src/Storages/Statistics/StatisticsUniq.h index 5290585bd94..faabde8d47c 100644 --- a/src/Storages/Statistics/StatisticsUniq.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -27,7 +27,7 @@ private: }; -void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); +void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); } diff --git a/src/Storages/Statistics/tests/gtest_stats.cpp b/src/Storages/Statistics/tests/gtest_stats.cpp index c3c14632ba1..e55c52c49f3 100644 --- a/src/Storages/Statistics/tests/gtest_stats.cpp +++ b/src/Storages/Statistics/tests/gtest_stats.cpp @@ -1,6 +1,10 @@ #include #include +#include +#include + +using namespace DB; TEST(Statistics, TDigestLessThan) { @@ -39,6 +43,4 @@ TEST(Statistics, TDigestLessThan) std::reverse(data.begin(), data.end()); test_less_than(data, {-1, 1e9, 50000.0, 3000.0, 30.0}, {0, 100000, 50000, 3000, 30}, {0, 0, 0.001, 0.001, 0.001}); - - } diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp index f10fb78f933..9c5fd3604b2 100644 --- a/src/Storages/StatisticsDescription.cpp +++ b/src/Storages/StatisticsDescription.cpp @@ -1,19 +1,14 @@ #include -#include #include #include #include #include -#include -#include #include #include #include -#include #include -#include namespace DB { @@ -54,7 +49,9 @@ static StatisticsType stringToStatisticsType(String type) return StatisticsType::TDigest; if (type == "uniq") return StatisticsType::Uniq; - throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type); + if (type == "count_min") + return StatisticsType::CountMinSketch; + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type); } String SingleStatisticsDescription::getTypeName() const @@ -65,8 +62,10 @@ String SingleStatisticsDescription::getTypeName() const return "TDigest"; case StatisticsType::Uniq: return "Uniq"; + case StatisticsType::CountMinSketch: + return "count_min"; default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type); } } @@ -99,10 +98,9 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe chassert(merging_column_type); if (column_name.empty()) - { column_name = merging_column_name; - data_type = merging_column_type; - } + + data_type = merging_column_type; for (const auto & [stats_type, stats_desc]: other.types_to_desc) { @@ -121,6 +119,7 @@ void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & oth throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name); types_to_desc = other.types_to_desc; + data_type = other.data_type; } void ColumnStatisticsDescription::clear() @@ -159,6 +158,7 @@ std::vector ColumnStatisticsDescription::fromAST(co const auto & column = columns.getPhysical(physical_column_name); stats.column_name = column.name; + stats.data_type = column.type; stats.types_to_desc = statistics_types; result.push_back(stats); } diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h index 4862fb79d45..03b8fb0d583 100644 --- a/src/Storages/StatisticsDescription.h +++ b/src/Storages/StatisticsDescription.h @@ -13,6 +13,7 @@ enum class StatisticsType : UInt8 { TDigest = 0, Uniq = 1, + CountMinSketch = 2, Max = 63, }; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 1bcc602f168..04e6d6676d1 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -313,19 +313,18 @@ void StorageBuffer::read( if (src_table_query_info.prewhere_info->row_level_filter) { src_table_query_info.prewhere_info->row_level_filter = ActionsDAG::merge( - std::move(*actions_dag->clone()), + actions_dag.clone(), std::move(*src_table_query_info.prewhere_info->row_level_filter)); src_table_query_info.prewhere_info->row_level_filter->removeUnusedActions(); } - if (src_table_query_info.prewhere_info->prewhere_actions) { src_table_query_info.prewhere_info->prewhere_actions = ActionsDAG::merge( - std::move(*actions_dag->clone()), - std::move(*src_table_query_info.prewhere_info->prewhere_actions)); + actions_dag.clone(), + std::move(src_table_query_info.prewhere_info->prewhere_actions)); - src_table_query_info.prewhere_info->prewhere_actions->removeUnusedActions(); + src_table_query_info.prewhere_info->prewhere_actions.removeUnusedActions(); } } @@ -354,7 +353,7 @@ void StorageBuffer::read( header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); - auto converting = std::make_unique(query_plan.getCurrentDataStream(), actions_dag); + auto converting = std::make_unique(query_plan.getCurrentDataStream(), std::move(actions_dag)); converting->setStepDescription("Convert destination table columns to Buffer table structure"); query_plan.addStep(std::move(converting)); @@ -429,21 +428,23 @@ void StorageBuffer::read( if (query_info.prewhere_info->row_level_filter) { + auto actions = std::make_shared(query_info.prewhere_info->row_level_filter->clone(), actions_settings); pipe_from_buffers.addSimpleTransform([&](const Block & header) { return std::make_shared( header, - std::make_shared(query_info.prewhere_info->row_level_filter, actions_settings), + actions, query_info.prewhere_info->row_level_column_name, false); }); } + auto actions = std::make_shared(query_info.prewhere_info->prewhere_actions.clone(), actions_settings); pipe_from_buffers.addSimpleTransform([&](const Block & header) { return std::make_shared( header, - std::make_shared(query_info.prewhere_info->prewhere_actions, actions_settings), + actions, query_info.prewhere_info->prewhere_column_name, query_info.prewhere_info->remove_prewhere_column); }); @@ -473,7 +474,7 @@ void StorageBuffer::read( result_header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); - auto converting = std::make_unique(query_plan.getCurrentDataStream(), convert_actions_dag); + auto converting = std::make_unique(query_plan.getCurrentDataStream(), std::move(convert_actions_dag)); query_plan.addStep(std::move(converting)); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 65323b4bb52..3e38ddf830a 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -43,7 +43,6 @@ #include #include -#include #include #include #include @@ -61,26 +60,20 @@ #include #include #include -#include #include #include #include #include #include -#include #include #include #include #include #include #include -#include #include -#include #include -#include -#include #include #include @@ -90,7 +83,6 @@ #include #include #include -#include #include #include #include @@ -496,7 +488,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( } std::optional optimized_stage; - if (settings.allow_experimental_analyzer) + if (query_info.query_tree) optimized_stage = getOptimizedQueryProcessingStageAnalyzer(query_info, settings); else optimized_stage = getOptimizedQueryProcessingStage(query_info, settings); @@ -860,31 +852,28 @@ void StorageDistributed::read( modified_query_info.query = queryNodeToDistributedSelectQuery(query_tree_distributed); modified_query_info.query_tree = std::move(query_tree_distributed); + + /// Return directly (with correct header) if no shard to query. + if (modified_query_info.getCluster()->getShardsInfo().empty()) + return; } else { header = InterpreterSelectQuery(modified_query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); - } - if (!settings.allow_experimental_analyzer) - { modified_query_info.query = ClusterProxy::rewriteSelectQuery( local_context, modified_query_info.query, remote_database, remote_table, remote_table_function_ptr); - } - /// Return directly (with correct header) if no shard to query. - if (modified_query_info.getCluster()->getShardsInfo().empty()) - { - if (settings.allow_experimental_analyzer) + if (modified_query_info.getCluster()->getShardsInfo().empty()) + { + Pipe pipe(std::make_shared(header)); + auto read_from_pipe = std::make_unique(std::move(pipe)); + read_from_pipe->setStepDescription("Read from NullSource (Distributed)"); + query_plan.addStep(std::move(read_from_pipe)); + return; - - Pipe pipe(std::make_shared(header)); - auto read_from_pipe = std::make_unique(std::move(pipe)); - read_from_pipe->setStepDescription("Read from NullSource (Distributed)"); - query_plan.addStep(std::move(read_from_pipe)); - - return; + } } const auto & snapshot_data = assert_cast(*storage_snapshot->data); @@ -1063,7 +1052,7 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu return pipeline; } -static ActionsDAGPtr getFilterFromQuery(const ASTPtr & ast, ContextPtr context) +static std::optional getFilterFromQuery(const ASTPtr & ast, ContextPtr context) { QueryPlan plan; SelectQueryOptions options; @@ -1107,9 +1096,9 @@ static ActionsDAGPtr getFilterFromQuery(const ASTPtr & ast, ContextPtr context) } if (!source) - return nullptr; + return {}; - return source->getFilterActionsDAG(); + return source->detachFilterActionsDAG(); } diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 381c20c616d..0094723e3fd 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -77,7 +77,8 @@ StorageExecutable::StorageExecutable( const ExecutableSettings & settings_, const std::vector & input_queries_, const ColumnsDescription & columns, - const ConstraintsDescription & constraints) + const ConstraintsDescription & constraints, + const String & comment) : IStorage(table_id_) , settings(settings_) , input_queries(input_queries_) @@ -86,6 +87,7 @@ StorageExecutable::StorageExecutable( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns); storage_metadata.setConstraints(constraints); + storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); ShellCommandSourceCoordinator::Configuration configuration @@ -237,7 +239,7 @@ void registerStorageExecutable(StorageFactory & factory) settings.loadFromQuery(*args.storage_def); auto global_context = args.getContext()->getGlobalContext(); - return std::make_shared(args.table_id, format, settings, input_queries, columns, constraints); + return std::make_shared(args.table_id, format, settings, input_queries, columns, constraints, args.comment); }; StorageFactory::StorageFeatures storage_features; @@ -255,4 +257,3 @@ void registerStorageExecutable(StorageFactory & factory) } } - diff --git a/src/Storages/StorageExecutable.h b/src/Storages/StorageExecutable.h index 2be2a84ab49..6748bb3223e 100644 --- a/src/Storages/StorageExecutable.h +++ b/src/Storages/StorageExecutable.h @@ -22,7 +22,8 @@ public: const ExecutableSettings & settings, const std::vector & input_queries, const ColumnsDescription & columns, - const ConstraintsDescription & constraints); + const ConstraintsDescription & constraints, + const String & comment); String getName() const override { diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 6c31a8a3a71..98cd5c4dfa9 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -427,7 +427,9 @@ namespace { for (const auto & path : paths) { - if (auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(path)) + auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(path); + /// Use this format only if we have a schema reader for it. + if (format_from_path && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_path)) { format = format_from_path; break; @@ -716,7 +718,9 @@ namespace /// If format is unknown we can try to determine it by the file name. if (!format) { - if (auto format_from_file = FormatFactory::instance().tryGetFormatFromFileName(*filename)) + auto format_from_file = FormatFactory::instance().tryGetFormatFromFileName(*filename); + /// Use this format only if we have a schema reader for it. + if (format_from_file && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file)) format = format_from_file; } @@ -1130,12 +1134,16 @@ StorageFileSource::FilesIterator::FilesIterator( bool distributed_processing_) : WithContext(context_), files(files_), archive_info(std::move(archive_info_)), distributed_processing(distributed_processing_) { - ActionsDAGPtr filter_dag; + std::optional filter_dag; if (!distributed_processing && !archive_info && !files.empty()) filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); if (filter_dag) - VirtualColumnUtils::filterByPathOrFile(files, files, filter_dag, virtual_columns, context_); + { + VirtualColumnUtils::buildSetsForDAG(*filter_dag, context_); + auto actions = std::make_shared(std::move(*filter_dag)); + VirtualColumnUtils::filterByPathOrFile(files, files, actions, virtual_columns); + } } String StorageFileSource::FilesIterator::next() @@ -1244,7 +1252,7 @@ StorageFileSource::~StorageFileSource() beforeDestroy(); } -void StorageFileSource::setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) +void StorageFileSource::setKeyCondition(const std::optional & filter_actions_dag, ContextPtr context_) { setKeyConditionImpl(filter_actions_dag, context_, block_for_format); } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index f955889185c..895a8a663b8 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -266,7 +266,7 @@ private: return storage->getName(); } - void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override; + void setKeyCondition(const std::optional & filter_actions_dag, ContextPtr context_) override; bool tryGetCountFromCache(const struct stat & file_stat); diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp deleted file mode 100644 index 6e8f425f8dc..00000000000 --- a/src/Storages/StorageFuzzQuery.cpp +++ /dev/null @@ -1,169 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - -ColumnPtr FuzzQuerySource::createColumn() -{ - auto column = ColumnString::create(); - ColumnString::Chars & data_to = column->getChars(); - ColumnString::Offsets & offsets_to = column->getOffsets(); - - offsets_to.resize(block_size); - IColumn::Offset offset = 0; - - auto fuzz_base = query; - size_t row_num = 0; - - while (row_num < block_size) - { - ASTPtr new_query = fuzz_base->clone(); - - auto base_before_fuzz = fuzz_base->formatForErrorMessage(); - fuzzer.fuzzMain(new_query); - auto fuzzed_text = new_query->formatForErrorMessage(); - - if (base_before_fuzz == fuzzed_text) - continue; - - /// AST is too long, will start from the original query. - if (config.max_query_length > 500) - { - fuzz_base = query; - continue; - } - - IColumn::Offset next_offset = offset + fuzzed_text.size() + 1; - data_to.resize(next_offset); - - std::copy(fuzzed_text.begin(), fuzzed_text.end(), &data_to[offset]); - - data_to[offset + fuzzed_text.size()] = 0; - offsets_to[row_num] = next_offset; - - offset = next_offset; - fuzz_base = new_query; - ++row_num; - } - - return column; -} - -StorageFuzzQuery::StorageFuzzQuery( - const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_) - : IStorage(table_id_), config(config_) -{ - StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(columns_); - storage_metadata.setComment(comment_); - setInMemoryMetadata(storage_metadata); -} - -Pipe StorageFuzzQuery::read( - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, - ContextPtr /*context*/, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - storage_snapshot->check(column_names); - - Pipes pipes; - pipes.reserve(num_streams); - - const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns(); - Block block_header; - for (const auto & name : column_names) - { - const auto & name_type = our_columns.get(name); - MutableColumnPtr column = name_type.type->createColumn(); - block_header.insert({std::move(column), name_type.type, name_type.name}); - } - - const char * begin = config.query.data(); - const char * end = begin + config.query.size(); - - ParserQuery parser(end, false); - auto query = parseQuery(parser, begin, end, "", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); - - for (UInt64 i = 0; i < num_streams; ++i) - pipes.emplace_back(std::make_shared(max_block_size, block_header, config, query)); - - return Pipe::unitePipes(std::move(pipes)); -} - -StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine_args, ContextPtr local_context) -{ - StorageFuzzQuery::Configuration configuration{}; - - // Supported signatures: - // - // FuzzQuery(query) - // FuzzQuery(query, max_query_length) - // FuzzQuery(query, max_query_length, random_seed) - if (engine_args.empty() || engine_args.size() > 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 3 arguments: query, max_query_length, random_seed"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); - configuration.query = std::move(first_arg); - - if (engine_args.size() >= 2) - { - const auto & literal = engine_args[1]->as(); - if (!literal.value.isNull()) - configuration.max_query_length = checkAndGetLiteralArgument(literal, "max_query_length"); - } - - if (engine_args.size() == 3) - { - const auto & literal = engine_args[2]->as(); - if (!literal.value.isNull()) - configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); - } - - return configuration; -} - -void registerStorageFuzzQuery(StorageFactory & factory) -{ - factory.registerStorage( - "FuzzQuery", - [](const StorageFactory::Arguments & args) -> std::shared_ptr - { - ASTs & engine_args = args.engine_args; - - if (engine_args.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzQuery must have arguments."); - - StorageFuzzQuery::Configuration configuration = StorageFuzzQuery::getConfiguration(engine_args, args.getLocalContext()); - - for (const auto& col : args.columns) - if (col.type->getTypeId() != TypeIndex::String) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "'StorageFuzzQuery' supports only columns of String type, got {}.", col.type->getName()); - - return std::make_shared(args.table_id, args.columns, args.comment, configuration); - }); -} - -} diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h deleted file mode 100644 index 125ef960e74..00000000000 --- a/src/Storages/StorageFuzzQuery.h +++ /dev/null @@ -1,88 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "config.h" - -namespace DB -{ - -class NamedCollection; - -class StorageFuzzQuery final : public IStorage -{ -public: - struct Configuration : public StatelessTableEngineConfiguration - { - String query; - UInt64 max_query_length = 500; - UInt64 random_seed = randomSeed(); - }; - - StorageFuzzQuery( - const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_); - - std::string getName() const override { return "FuzzQuery"; } - - Pipe read( - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - size_t num_streams) override; - - static StorageFuzzQuery::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); - -private: - const Configuration config; -}; - - -class FuzzQuerySource : public ISource -{ -public: - FuzzQuerySource( - UInt64 block_size_, Block block_header_, const StorageFuzzQuery::Configuration & config_, ASTPtr query_) - : ISource(block_header_) - , block_size(block_size_) - , block_header(std::move(block_header_)) - , config(config_) - , query(query_) - , fuzzer(config_.random_seed) - { - } - - String getName() const override { return "FuzzQuery"; } - -protected: - Chunk generate() override - { - Columns columns; - columns.reserve(block_header.columns()); - for (const auto & col : block_header) - { - chassert(col.type->getTypeId() == TypeIndex::String); - columns.emplace_back(createColumn()); - } - - return {std::move(columns), block_size}; - } - -private: - ColumnPtr createColumn(); - - UInt64 block_size; - Block block_header; - - StorageFuzzQuery::Configuration config; - ASTPtr query; - - QueryFuzzer fuzzer; -}; - -} diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 587cb621362..ef157239e26 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -64,6 +65,11 @@ namespace DB { +namespace FailPoints +{ + extern const char keepermap_fail_drop_data[]; +} + namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; @@ -411,18 +417,16 @@ StorageKeeperMap::StorageKeeperMap( auto code = client->tryCreate(zk_table_path, "", zkutil::CreateMode::Persistent); - // tables_path was removed with drop - if (code == Coordination::Error::ZNONODE) - { - LOG_INFO(log, "Metadata nodes were removed by another server, will retry"); - continue; - } - else if (code != Coordination::Error::ZOK) - { - throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", zk_root_path); - } + /// A table on the same Keeper path already exists, we just appended our table id to subscribe as a new replica + /// We still don't know if the table matches the expected metadata so table_is_valid is not changed + /// It will be checked lazily on the first operation + if (code == Coordination::Error::ZOK) + return; - return; + if (code != Coordination::Error::ZNONODE) + throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", zk_root_path); + + /// ZNONODE means we dropped zk_tables_path but didn't finish drop completely } if (client->exists(zk_dropped_path)) @@ -473,6 +477,7 @@ StorageKeeperMap::StorageKeeperMap( table_is_valid = true; + /// we are the first table created for the specified Keeper path, i.e. we are the first replica return; } @@ -561,6 +566,10 @@ void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock) { + fiu_do_on(FailPoints::keepermap_fail_drop_data, + { + throw zkutil::KeeperException(Coordination::Error::ZOPERATIONTIMEOUT, "Manually triggered operation timeout"); + }); zookeeper->removeChildrenRecursive(zk_data_path); bool completely_removed = false; @@ -1280,6 +1289,7 @@ StoragePtr create(const StorageFactory::Arguments & args) StorageInMemoryMetadata metadata; metadata.setColumns(args.columns); metadata.setConstraints(args.constraints); + metadata.setComment(args.comment); if (!args.storage_def->primary_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageKeeperMap requires one column in primary key"); diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 57d95a98f11..696136834d4 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -93,11 +93,6 @@ StorageMaterializedView::StorageMaterializedView( { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); - auto * storage_def = query.storage; - if (storage_def && storage_def->primary_key) - storage_metadata.primary_key = KeyDescription::getKeyFromAST(storage_def->primary_key->ptr(), - storage_metadata.columns, - local_context->getGlobalContext()); if (query.sql_security) storage_metadata.setSQLSecurity(query.sql_security->as()); @@ -110,12 +105,21 @@ StorageMaterializedView::StorageMaterializedView( throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); /// If the destination table is not set, use inner table - has_inner_table = query.to_table_id.empty(); - if (has_inner_table && !query.storage) + auto to_table_id = query.getTargetTableID(ViewTarget::To); + has_inner_table = to_table_id.empty(); + auto to_inner_uuid = query.getTargetInnerUUID(ViewTarget::To); + auto to_table_engine = query.getTargetInnerEngine(ViewTarget::To); + + if (has_inner_table && !to_table_engine) throw Exception(ErrorCodes::INCORRECT_QUERY, "You must specify where to save results of a MaterializedView query: " "either ENGINE or an existing table in a TO clause"); + if (to_table_engine && to_table_engine->primary_key) + storage_metadata.primary_key = KeyDescription::getKeyFromAST(to_table_engine->primary_key->ptr(), + storage_metadata.columns, + local_context->getGlobalContext()); + auto select = SelectQueryDescription::getSelectQueryFromASTForMatView(query.select->clone(), query.refresh_strategy != nullptr, local_context); if (select.select_table_id) { @@ -135,25 +139,25 @@ StorageMaterializedView::StorageMaterializedView( setInMemoryMetadata(storage_metadata); - bool point_to_itself_by_uuid = has_inner_table && query.to_inner_uuid != UUIDHelpers::Nil - && query.to_inner_uuid == table_id_.uuid; - bool point_to_itself_by_name = !has_inner_table && query.to_table_id.database_name == table_id_.database_name - && query.to_table_id.table_name == table_id_.table_name; + bool point_to_itself_by_uuid = has_inner_table && to_inner_uuid != UUIDHelpers::Nil + && to_inner_uuid == table_id_.uuid; + bool point_to_itself_by_name = !has_inner_table && to_table_id.database_name == table_id_.database_name + && to_table_id.table_name == table_id_.table_name; if (point_to_itself_by_uuid || point_to_itself_by_name) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Materialized view {} cannot point to itself", table_id_.getFullTableName()); if (!has_inner_table) { - target_table_id = query.to_table_id; + target_table_id = to_table_id; } else if (LoadingStrictnessLevel::ATTACH <= mode) { /// If there is an ATTACH request, then the internal table must already be created. - target_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID()), query.to_inner_uuid); + target_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID()), to_inner_uuid); } else { - const String & engine = query.storage->engine->name; + const String & engine = to_table_engine->engine->name; const auto & storage_features = StorageFactory::instance().getStorageFeatures(engine); /// We will create a query to create an internal table. @@ -161,8 +165,8 @@ StorageMaterializedView::StorageMaterializedView( auto manual_create_query = std::make_shared(); manual_create_query->setDatabase(getStorageID().database_name); manual_create_query->setTable(generateInnerTableName(getStorageID())); - manual_create_query->uuid = query.to_inner_uuid; - manual_create_query->has_uuid = query.to_inner_uuid != UUIDHelpers::Nil; + manual_create_query->uuid = to_inner_uuid; + manual_create_query->has_uuid = to_inner_uuid != UUIDHelpers::Nil; auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); @@ -184,7 +188,9 @@ StorageMaterializedView::StorageMaterializedView( } manual_create_query->set(manual_create_query->columns_list, new_columns_list); - manual_create_query->set(manual_create_query->storage, query.storage->ptr()); + + if (to_table_engine) + manual_create_query->set(manual_create_query->storage, to_table_engine); InterpreterCreateQuery create_interpreter(manual_create_query, create_context); create_interpreter.setInternal(true); @@ -275,8 +281,8 @@ void StorageMaterializedView::read( * They may be added in case of distributed query with JOIN. * In that case underlying table returns joined columns as well. */ - converting_actions->removeUnusedActions(); - auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), converting_actions); + converting_actions.removeUnusedActions(); + auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(converting_actions)); converting_step->setStepDescription("Convert target table structure to MaterializedView structure"); query_plan.addStep(std::move(converting_step)); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index f5bc183931f..7c268d36a7b 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -369,6 +369,14 @@ void StorageMerge::read( /// What will be result structure depending on query processed stage in source tables? Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, processed_stage); + if (local_context->getSettingsRef().allow_experimental_analyzer && processed_stage == QueryProcessingStage::Complete) + { + /// Remove constants. + /// For StorageDistributed some functions like `hostName` that are constants only for local queries. + for (auto & column : common_header) + column.column = column.column->convertToFullColumnIfConst(); + } + auto step = std::make_unique( column_names, query_info, @@ -408,7 +416,7 @@ void ReadFromMerge::addFilter(FilterDAGInfo filter) { output_stream->header = FilterTransform::transformHeader( output_stream->header, - filter.actions.get(), + &filter.actions, filter.column_name, filter.do_remove_column); pushed_down_filters.push_back(std::move(filter)); @@ -629,7 +637,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ auto alias_actions = ExpressionAnalyzer(required_columns_expr_list, syntax_result, context).getActionsDAG(true); - column_names_as_aliases = alias_actions->getRequiredColumns().getNames(); + column_names_as_aliases = alias_actions.getRequiredColumns().getNames(); if (column_names_as_aliases.empty()) column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name); } @@ -663,7 +671,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ { auto filter_step = std::make_unique( child.plan.getCurrentDataStream(), - filter_info.actions->clone(), + filter_info.actions.clone(), filter_info.column_name, filter_info.do_remove_column); @@ -1061,7 +1069,7 @@ void ReadFromMerge::addVirtualColumns( column.column = column.type->createColumnConst(0, Field(database_name)); auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), adding_column_dag); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(adding_column_dag)); child.plan.addStep(std::move(expression_step)); plan_header = child.plan.getCurrentDataStream().header; } @@ -1075,7 +1083,7 @@ void ReadFromMerge::addVirtualColumns( column.column = column.type->createColumnConst(0, Field(table_name)); auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), adding_column_dag); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(adding_column_dag)); child.plan.addStep(std::move(expression_step)); plan_header = child.plan.getCurrentDataStream().header; } @@ -1090,7 +1098,7 @@ void ReadFromMerge::addVirtualColumns( column.column = column.type->createColumnConst(0, Field(database_name)); auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), adding_column_dag); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(adding_column_dag)); child.plan.addStep(std::move(expression_step)); plan_header = child.plan.getCurrentDataStream().header; } @@ -1103,7 +1111,7 @@ void ReadFromMerge::addVirtualColumns( column.column = column.type->createColumnConst(0, Field(table_name)); auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), adding_column_dag); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(adding_column_dag)); child.plan.addStep(std::move(expression_step)); plan_header = child.plan.getCurrentDataStream().header; } @@ -1241,7 +1249,7 @@ ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter auto expression_analyzer = ExpressionAnalyzer{expr, syntax_result, local_context}; actions_dag = expression_analyzer.getActionsDAG(false /* add_aliases */, false /* project_result */); - filter_actions = std::make_shared(actions_dag, + filter_actions = std::make_shared(actions_dag.clone(), ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); const auto & required_columns = filter_actions->getRequiredColumnsWithTypes(); const auto & sample_block_columns = filter_actions->getSampleBlock().getNamesAndTypesList(); @@ -1279,12 +1287,12 @@ void ReadFromMerge::RowPolicyData::extendNames(Names & names) const void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step) const { - step->addFilter(actions_dag, filter_column_name); + step->addFilter(actions_dag.clone(), filter_column_name); } void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPlan & plan) const { - auto filter_step = std::make_unique(plan.getCurrentDataStream(), actions_dag, filter_column_name, true /* remove filter column */); + auto filter_step = std::make_unique(plan.getCurrentDataStream(), actions_dag.clone(), filter_column_name, true /* remove filter column */); plan.addStep(std::move(filter_step)); } @@ -1477,7 +1485,7 @@ void ReadFromMerge::convertAndFilterSourceStream( { pipe_columns.emplace_back(NameAndTypePair(alias.name, alias.type)); - auto actions_dag = std::make_shared(pipe_columns); + ActionsDAG actions_dag(pipe_columns); QueryTreeNodePtr query_tree = buildQueryTree(alias.expression, local_context); query_tree->setAlias(alias.name); @@ -1486,13 +1494,13 @@ void ReadFromMerge::convertAndFilterSourceStream( query_analysis_pass.run(query_tree, local_context); PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/); - const auto & nodes = actions_visitor.visit(*actions_dag, query_tree); + const auto & nodes = actions_visitor.visit(actions_dag, query_tree); if (nodes.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected to have 1 output but got {}", nodes.size()); - actions_dag->addOrReplaceInOutputs(actions_dag->addAlias(*nodes.front(), alias.name)); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), actions_dag); + actions_dag.addOrReplaceInOutputs(actions_dag.addAlias(*nodes.front(), alias.name)); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(actions_dag)); child.plan.addStep(std::move(expression_step)); } } @@ -1507,7 +1515,7 @@ void ReadFromMerge::convertAndFilterSourceStream( auto dag = std::make_shared(pipe_columns); auto actions_dag = expression_analyzer.getActionsDAG(true, false); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), actions_dag); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(actions_dag)); child.plan.addStep(std::move(expression_step)); } } @@ -1525,7 +1533,7 @@ void ReadFromMerge::convertAndFilterSourceStream( header.getColumnsWithTypeAndName(), convert_actions_match_columns_mode); - auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), convert_actions_dag); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), std::move(convert_actions_dag)); child.plan.addStep(std::move(expression_step)); } @@ -1566,7 +1574,7 @@ bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_) void ReadFromMerge::applyFilters(ActionDAGNodes added_filter_nodes) { for (const auto & filter_info : pushed_down_filters) - added_filter_nodes.nodes.push_back(&filter_info.actions->findInOutputs(filter_info.column_name)); + added_filter_nodes.nodes.push_back(&filter_info.actions.findInOutputs(filter_info.column_name)); SourceStepWithFilter::applyFilters(added_filter_nodes); diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 94b34256d02..d6f2deca7fd 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -225,7 +225,7 @@ private: private: std::string filter_column_name; // complex filter, may contain logic operations - ActionsDAGPtr actions_dag; + ActionsDAG actions_dag; ExpressionActionsPtr filter_actions; StorageMetadataPtr storage_metadata_snapshot; }; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 444b3fbae4c..f55f672fe5e 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -263,7 +263,7 @@ std::optional StorageMergeTree::totalRows(const Settings &) const return getTotalActiveSizeInRows(); } -std::optional StorageMergeTree::totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context) const +std::optional StorageMergeTree::totalRowsByPartitionPredicate(const ActionsDAG & filter_actions_dag, ContextPtr local_context) const { auto parts = getVisibleDataPartsVector(local_context); return totalRowsByPartitionPredicateImpl(filter_actions_dag, local_context, parts); @@ -505,18 +505,18 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context additional_info = fmt::format(" (TID: {}; TIDH: {})", current_tid, current_tid.getHash()); } - Int64 version; + MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings()); + Int64 version = increment.get(); + entry.commit(version); + String mutation_id = entry.file_name; + if (txn) + txn->addMutation(shared_from_this(), mutation_id); + + bool alter_conversions_mutations_updated = updateAlterConversionsMutations(entry.commands, alter_conversions_mutations, /* remove= */ false); + { std::lock_guard lock(currently_processing_in_background_mutex); - MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings()); - version = increment.get(); - entry.commit(version); - String mutation_id = entry.file_name; - if (txn) - txn->addMutation(shared_from_this(), mutation_id); - - bool alter_conversions_mutations_updated = updateAlterConversionsMutations(entry.commands, alter_conversions_mutations, /* remove= */ false); bool inserted = current_mutations_by_version.try_emplace(version, std::move(entry)).second; if (!inserted) { @@ -527,9 +527,9 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context } throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", version); } - - LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info); } + + LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info); background_operations_assignee.trigger(); return version; } diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 064b51739bd..6223b5f50fa 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -65,7 +65,7 @@ public: size_t num_streams) override; std::optional totalRows(const Settings &) const override; - std::optional totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr) const override; + std::optional totalRowsByPartitionPredicate(const ActionsDAG & filter_actions_dag, ContextPtr) const override; std::optional totalBytes(const Settings &) const override; std::optional totalBytesUncompressed(const Settings &) const override; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 72f725965e0..2d826c6c2df 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1516,7 +1516,7 @@ static time_t tryGetPartCreateTime(zkutil::ZooKeeperPtr & zookeeper, const Strin void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart(const Strings & parts_in_zk, const Strings & parts_to_fetch) const { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD constexpr bool paranoid_check_for_covered_parts_default = true; #else constexpr bool paranoid_check_for_covered_parts_default = false; @@ -2383,7 +2383,7 @@ static void paranoidCheckForCoveredPartsInZooKeeper( const String & covering_part_name, const StorageReplicatedMergeTree & storage) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD constexpr bool paranoid_check_for_covered_parts_default = true; #else constexpr bool paranoid_check_for_covered_parts_default = false; @@ -3940,7 +3940,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() merge_selecting_task->schedule(); else { - LOG_TRACE(log, "Scheduling next merge selecting task after {}ms", merge_selecting_sleep_ms); + LOG_TRACE(log, "Scheduling next merge selecting task after {}ms, current attempt status: {}", merge_selecting_sleep_ms, result); merge_selecting_task->scheduleAfter(merge_selecting_sleep_ms); } } @@ -5604,7 +5604,7 @@ std::optional StorageReplicatedMergeTree::totalRows(const Settings & set return res; } -std::optional StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context) const +std::optional StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const ActionsDAG & filter_actions_dag, ContextPtr local_context) const { DataPartsVector parts; foreachActiveParts([&](auto & part) { parts.push_back(part); }, local_context->getSettingsRef().select_sequential_consistency); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index f96206ce657..2e54f17d5d5 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -159,7 +159,7 @@ public: size_t num_streams) override; std::optional totalRows(const Settings & settings) const override; - std::optional totalRowsByPartitionPredicate(const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const override; + std::optional totalRowsByPartitionPredicate(const ActionsDAG & filter_actions_dag, ContextPtr context) const override; std::optional totalBytes(const Settings & settings) const override; std::optional totalBytesUncompressed(const Settings & settings) const override; diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 85417a2f2a4..b90b15f3b99 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -50,6 +50,7 @@ StorageSQLite::StorageSQLite( const String & remote_table_name_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, + const String & comment, ContextPtr context_) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) @@ -71,6 +72,7 @@ StorageSQLite::StorageSQLite( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); + storage_metadata.setComment(comment); } @@ -211,7 +213,7 @@ void registerStorageSQLite(StorageFactory & factory) auto sqlite_db = openSQLiteDB(database_path, args.getContext(), /* throw_on_error */ args.mode <= LoadingStrictnessLevel::CREATE); return std::make_shared(args.table_id, sqlite_db, database_path, - table_name, args.columns, args.constraints, args.getContext()); + table_name, args.columns, args.constraints, args.comment, args.getContext()); }, { .supports_schema_inference = true, diff --git a/src/Storages/StorageSQLite.h b/src/Storages/StorageSQLite.h index ed673123fe0..97638ac04cb 100644 --- a/src/Storages/StorageSQLite.h +++ b/src/Storages/StorageSQLite.h @@ -27,6 +27,7 @@ public: const String & remote_table_name_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, + const String & comment, ContextPtr context_); std::string getName() const override { return "SQLite"; } diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 9507eb6ed8a..345dd62c687 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -112,7 +112,7 @@ public: auto step = std::make_unique( query_plan.getCurrentDataStream(), - convert_actions_dag); + std::move(convert_actions_dag)); step->setStepDescription("Converting columns"); query_plan.addStep(std::move(step)); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index fd7c3976803..318ec7f367d 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -198,7 +198,7 @@ public: { uris = parseRemoteDescription(uri_, 0, uri_.size(), ',', max_addresses); - ActionsDAGPtr filter_dag; + std::optional filter_dag; if (!uris.empty()) filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns); @@ -209,7 +209,9 @@ public: for (const auto & uri : uris) paths.push_back(Poco::URI(uri).getPath()); - VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context); + VirtualColumnUtils::buildSetsForDAG(*filter_dag, context); + auto actions = std::make_shared(std::move(*filter_dag)); + VirtualColumnUtils::filterByPathOrFile(uris, paths, actions, virtual_columns); } } @@ -737,7 +739,9 @@ namespace { for (const auto & url : options) { - if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(url)) + auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(url); + /// Use this format only if we have a schema reader for it. + if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name)) { format = format_from_file_name; break; diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 1804079e75f..63d01a02417 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -185,7 +185,7 @@ public: String getName() const override { return name; } - void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override + void setKeyCondition(const std::optional & filter_actions_dag, ContextPtr context_) override { setKeyConditionImpl(filter_actions_dag, context_, block_for_format); } diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 894b1404a21..c1ca6244866 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -48,14 +48,14 @@ Pipe StorageValues::read( if (!prepared_pipe.empty()) { - auto dag = std::make_shared(prepared_pipe.getHeader().getColumnsWithTypeAndName()); + ActionsDAG dag(prepared_pipe.getHeader().getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs outputs; outputs.reserve(column_names.size()); for (const auto & name : column_names) - outputs.push_back(dag->getOutputs()[prepared_pipe.getHeader().getPositionByName(name)]); + outputs.push_back(dag.getOutputs()[prepared_pipe.getHeader().getPositionByName(name)]); - dag->getOutputs().swap(outputs); - auto expression = std::make_shared(dag); + dag.getOutputs().swap(outputs); + auto expression = std::make_shared(std::move(dag)); prepared_pipe.addSimpleTransform([&](const Block & header) { diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 5f768bce978..581c2c978c4 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -179,8 +179,8 @@ void StorageView::read( /// It's expected that the columns read from storage are not constant. /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery. - auto materializing_actions = std::make_shared(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); - materializing_actions->addMaterializingOutputActions(); + ActionsDAG materializing_actions(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + materializing_actions.addMaterializingOutputActions(); auto materializing = std::make_unique(query_plan.getCurrentDataStream(), std::move(materializing_actions)); materializing->setStepDescription("Materialize constants after VIEW subquery"); @@ -205,7 +205,7 @@ void StorageView::read( expected_header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); - auto converting = std::make_unique(query_plan.getCurrentDataStream(), convert_actions_dag); + auto converting = std::make_unique(query_plan.getCurrentDataStream(), std::move(convert_actions_dag)); converting->setStepDescription("Convert VIEW subquery result to VIEW table structure"); query_plan.addStep(std::move(converting)); } diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 9201eef185f..35b9c0008c6 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -1,5 +1,6 @@ // autogenerated by tests/ci/version_helper.py const char * auto_contributors[] { + "0x01f", "0xflotus", "13DaGGeR", "1lann", @@ -167,6 +168,7 @@ const char * auto_contributors[] { "AnneClickHouse", "Anselmo D. Adams", "Anthony N. Simon", + "AntiTopQuark", "Anton Ivashkin", "Anton Kobzev", "Anton Kozlov", @@ -299,6 +301,7 @@ const char * auto_contributors[] { "Dan Wu", "DanRoscigno", "Dani Pozo", + "Daniel Anugerah", "Daniel Bershatsky", "Daniel Byta", "Daniel Dao", @@ -370,6 +373,7 @@ const char * auto_contributors[] { "Elena", "Elena Baskakova", "Elena Torró", + "Elena Torró Martínez", "Elghazal Ahmed", "Eliot Hautefeuille", "Elizaveta Mironyuk", @@ -415,6 +419,7 @@ const char * auto_contributors[] { "FgoDt", "Filatenkov Artur", "Filipe Caixeta", + "Filipp Bakanov", "Filipp Ozinov", "Filippov Denis", "Fille", @@ -451,6 +456,7 @@ const char * auto_contributors[] { "Gleb Novikov", "Gleb-Tretyakov", "GoGoWen2021", + "Gosha Letov", "Gregory", "Grigorii Sokolik", "Grigory", @@ -461,6 +467,7 @@ const char * auto_contributors[] { "Guillaume Tassery", "Guo Wangyang", "Guo Wei (William)", + "Guspan Tanadi", "Haavard Kvaalen", "Habibullah Oladepo", "HaiBo Li", @@ -474,6 +481,7 @@ const char * auto_contributors[] { "HarryLeeIBM", "Hasitha Kanchana", "Hasnat", + "Haydn", "Heena Bansal", "HeenaBansal2009", "Hendrik M", @@ -606,6 +614,7 @@ const char * auto_contributors[] { "Kevin Chiang", "Kevin Michel", "Kevin Mingtarja", + "Kevin Song", "Kevin Zhang", "KevinyhZou", "KinderRiven", @@ -661,6 +670,7 @@ const char * auto_contributors[] { "Lewinma", "Li Shuai", "Li Yin", + "Linh Giang", "Lino Uruñuela", "Lirikl", "Liu Cong", @@ -690,6 +700,7 @@ const char * auto_contributors[] { "Maksim Alekseev", "Maksim Buren", "Maksim Fedotov", + "Maksim Galkin", "Maksim Kita", "Maksym Sobolyev", "Mal Curtis", @@ -724,6 +735,7 @@ const char * auto_contributors[] { "Max Akhmedov", "Max Bruce", "Max K", + "Max K.", "Max Kainov", "Max Vetrov", "MaxTheHuman", @@ -811,6 +823,7 @@ const char * auto_contributors[] { "Nataly Merezhuk", "Natalya Chizhonkova", "Natasha Murashkina", + "Nathan Clevenger", "NeZeD [Mac Pro]", "Neeke Gao", "Neng Liu", @@ -946,6 +959,7 @@ const char * auto_contributors[] { "Robert Coelho", "Robert Hodges", "Robert Schulze", + "Rodolphe Dugé de Bernonville", "RogerYK", "Rohit Agarwal", "Romain Neutron", @@ -1107,6 +1121,7 @@ const char * auto_contributors[] { "Timur Solodovnikov", "TiunovNN", "Tobias Adamson", + "Tobias Florek", "Tobias Lins", "Tom Bombadil", "Tom Risse", @@ -1231,11 +1246,13 @@ const char * auto_contributors[] { "Yingchun Lai", "Yingfan Chen", "Yinzheng-Sun", + "Yinzuo Jiang", "Yiğit Konur", "Yohann Jardin", "Yong Wang", "Yong-Hao Zou", "Youenn Lebras", + "Your Name", "Yu, Peng", "Yuko Takagi", "Yuntao Wu", @@ -1250,6 +1267,7 @@ const char * auto_contributors[] { "Yury Stankevich", "Yusuke Tanaka", "Zach Naimon", + "Zawa-II", "Zheng Miao", "ZhiHong Zhang", "ZhiYong Wang", @@ -1380,6 +1398,7 @@ const char * auto_contributors[] { "conicliu", "copperybean", "coraxster", + "cw5121", "cwkyaoyao", "d.v.semenov", "dalei2019", @@ -1460,12 +1479,14 @@ const char * auto_contributors[] { "fuzzERot", "fyu", "g-arslan", + "gabrielmcg44", "ggerogery", "giordyb", "glockbender", "glushkovds", "grantovsky", "gulige", + "gun9nir", "guoleiyi", "guomaolin", "guov100", @@ -1527,6 +1548,7 @@ const char * auto_contributors[] { "jferroal", "jiahui-97", "jianmei zhang", + "jiaosenvip", "jinjunzh", "jiyoungyoooo", "jktng", @@ -1541,6 +1563,7 @@ const char * auto_contributors[] { "jun won", "jus1096", "justindeguzman", + "jwoodhead", "jyz0309", "karnevil13", "kashwy", @@ -1633,10 +1656,12 @@ const char * auto_contributors[] { "mateng0915", "mateng915", "mauidude", + "max-vostrikov", "maxim", "maxim-babenko", "maxkuzn", "maxulan", + "maxvostrikov", "mayamika", "mehanizm", "melin", @@ -1677,6 +1702,7 @@ const char * auto_contributors[] { "nathanbegbie", "nauta", "nautaa", + "nauu", "ndchikin", "nellicus", "nemonlou", @@ -1975,6 +2001,7 @@ const char * auto_contributors[] { "张健", "张风啸", "徐炘", + "忒休斯~Theseus", "曲正鹏", "木木夕120", "未来星___费", diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 7e4c1de1c65..0d0ae666c10 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -307,7 +307,7 @@ protected: std::shared_ptr storage; std::vector columns_mask; - ActionsDAGPtr filter; + std::optional filter; const size_t max_block_size; const size_t num_streams; }; @@ -359,7 +359,7 @@ void StorageSystemDetachedParts::read( void ReadFromSystemDetachedParts::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - auto state = std::make_shared(StoragesInfoStream(nullptr, filter, context)); + auto state = std::make_shared(StoragesInfoStream({}, std::move(filter), context)); Pipe pipe; diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.cpp b/src/Storages/System/StorageSystemDroppedTablesParts.cpp index c17d6402d88..c2601b8ebe3 100644 --- a/src/Storages/System/StorageSystemDroppedTablesParts.cpp +++ b/src/Storages/System/StorageSystemDroppedTablesParts.cpp @@ -11,7 +11,7 @@ namespace DB { -StoragesDroppedInfoStream::StoragesDroppedInfoStream(const ActionsDAGPtr & filter, ContextPtr context) +StoragesDroppedInfoStream::StoragesDroppedInfoStream(std::optional filter, ContextPtr context) : StoragesInfoStreamBase(context) { /// Will apply WHERE to subset of columns and then add more columns. @@ -75,7 +75,7 @@ StoragesDroppedInfoStream::StoragesDroppedInfoStream(const ActionsDAGPtr & filte { /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'. if (filter) - VirtualColumnUtils::filterBlockWithDAG(filter, block_to_filter, context); + VirtualColumnUtils::filterBlockWithExpression(VirtualColumnUtils::buildFilterExpression(std::move(*filter), context), block_to_filter); rows = block_to_filter.rows(); } diff --git a/src/Storages/System/StorageSystemDroppedTablesParts.h b/src/Storages/System/StorageSystemDroppedTablesParts.h index dff9e41cce3..32468fc31b2 100644 --- a/src/Storages/System/StorageSystemDroppedTablesParts.h +++ b/src/Storages/System/StorageSystemDroppedTablesParts.h @@ -9,7 +9,7 @@ namespace DB class StoragesDroppedInfoStream : public StoragesInfoStreamBase { public: - StoragesDroppedInfoStream(const ActionsDAGPtr & filter, ContextPtr context); + StoragesDroppedInfoStream(std::optional filter, ContextPtr context); protected: bool tryLockTable(StoragesInfo &) override { @@ -30,9 +30,9 @@ public: std::string getName() const override { return "SystemDroppedTablesParts"; } protected: - std::unique_ptr getStoragesInfoStream(const ActionsDAGPtr &, const ActionsDAGPtr & filter, ContextPtr context) override + std::unique_ptr getStoragesInfoStream(std::optional, std::optional filter, ContextPtr context) override { - return std::make_unique(filter, context); + return std::make_unique(std::move(filter), context); } }; diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index f7d1c1b3eb8..7ace8ee24aa 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -91,7 +91,7 @@ StoragesInfo::getProjectionParts(MergeTreeData::DataPartStateVector & state, boo return data->getProjectionPartsVectorForInternalUsage({State::Active}, &state); } -StoragesInfoStream::StoragesInfoStream(const ActionsDAGPtr & filter_by_database, const ActionsDAGPtr & filter_by_other_columns, ContextPtr context) +StoragesInfoStream::StoragesInfoStream(std::optional filter_by_database, std::optional filter_by_other_columns, ContextPtr context) : StoragesInfoStreamBase(context) { /// Will apply WHERE to subset of columns and then add more columns. @@ -124,7 +124,7 @@ StoragesInfoStream::StoragesInfoStream(const ActionsDAGPtr & filter_by_database, /// Filter block_to_filter with column 'database'. if (filter_by_database) - VirtualColumnUtils::filterBlockWithDAG(filter_by_database, block_to_filter, context); + VirtualColumnUtils::filterBlockWithExpression(VirtualColumnUtils::buildFilterExpression(std::move(*filter_by_database), context), block_to_filter); rows = block_to_filter.rows(); /// Block contains new columns, update database_column. @@ -204,7 +204,7 @@ StoragesInfoStream::StoragesInfoStream(const ActionsDAGPtr & filter_by_database, { /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'. if (filter_by_other_columns) - VirtualColumnUtils::filterBlockWithDAG(filter_by_other_columns, block_to_filter, context); + VirtualColumnUtils::filterBlockWithExpression(VirtualColumnUtils::buildFilterExpression(std::move(*filter_by_other_columns), context), block_to_filter); rows = block_to_filter.rows(); } @@ -236,8 +236,8 @@ protected: std::shared_ptr storage; std::vector columns_mask; const bool has_state_column; - ActionsDAGPtr filter_by_database; - ActionsDAGPtr filter_by_other_columns; + std::optional filter_by_database; + std::optional filter_by_other_columns; }; ReadFromSystemPartsBase::ReadFromSystemPartsBase( @@ -318,7 +318,7 @@ void StorageSystemPartsBase::read( void ReadFromSystemPartsBase::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - auto stream = storage->getStoragesInfoStream(filter_by_database, filter_by_other_columns, context); + auto stream = storage->getStoragesInfoStream(std::move(filter_by_database), std::move(filter_by_other_columns), context); auto header = getOutputStream().header; MutableColumns res_columns = header.cloneEmptyColumns(); diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 8671fd850f8..806af4a7bf8 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -116,7 +116,7 @@ protected: class StoragesInfoStream : public StoragesInfoStreamBase { public: - StoragesInfoStream(const ActionsDAGPtr & filter_by_database, const ActionsDAGPtr & filter_by_other_columns, ContextPtr context); + StoragesInfoStream(std::optional filter_by_database, std::optional filter_by_other_columns, ContextPtr context); }; /** Implements system table 'parts' which allows to get information about data parts for tables of MergeTree family. @@ -146,9 +146,9 @@ protected: StorageSystemPartsBase(const StorageID & table_id_, ColumnsDescription && columns); - virtual std::unique_ptr getStoragesInfoStream(const ActionsDAGPtr & filter_by_database, const ActionsDAGPtr & filter_by_other_columns, ContextPtr context) + virtual std::unique_ptr getStoragesInfoStream(std::optional filter_by_database, std::optional filter_by_other_columns, ContextPtr context) { - return std::make_unique(filter_by_database, filter_by_other_columns, context); + return std::make_unique(std::move(filter_by_database), std::move(filter_by_other_columns), context); } virtual void diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index cc221bbea69..6cc525b9340 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -277,7 +277,7 @@ public: StackTraceSource( const Names & column_names, Block header_, - ActionsDAGPtr && filter_dag_, + std::optional filter_dag_, ContextPtr context_, UInt64 max_block_size_, LoggerPtr log_) @@ -423,7 +423,7 @@ protected: private: ContextPtr context; Block header; - const ActionsDAGPtr filter_dag; + const std::optional filter_dag; const ActionsDAG::Node * predicate; const size_t max_block_size; diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 43b761d84b1..943ce9c317a 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -125,7 +125,7 @@ ColumnPtr getFilteredTables( block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); if (dag) - VirtualColumnUtils::filterBlockWithDAG(dag, block, context); + VirtualColumnUtils::filterBlockWithExpression(VirtualColumnUtils::buildFilterExpression(std::move(*dag), context), block); return block.getByPosition(0).column; } @@ -484,7 +484,8 @@ protected: if (ast_create && !context->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) { ast_create->uuid = UUIDHelpers::Nil; - ast_create->to_inner_uuid = UUIDHelpers::Nil; + if (ast_create->targets) + ast_create->targets->resetInnerUUIDs(); } if (columns_mask[src_index++]) diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index ec29b84dac3..72a7ba38429 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -27,7 +28,7 @@ ColumnsDescription StorageSystemZooKeeperConnection::getColumnsDescription() /* 0 */ {"name", std::make_shared(), "ZooKeeper cluster's name."}, /* 1 */ {"host", std::make_shared(), "The hostname/IP of the ZooKeeper node that ClickHouse connected to."}, /* 2 */ {"port", std::make_shared(), "The port of the ZooKeeper node that ClickHouse connected to."}, - /* 3 */ {"index", std::make_shared(), "The index of the ZooKeeper node that ClickHouse connected to. The index is from ZooKeeper config."}, + /* 3 */ {"index", std::make_shared(std::make_shared()), "The index of the ZooKeeper node that ClickHouse connected to. The index is from ZooKeeper config. If not connected, this column is NULL."}, /* 4 */ {"connected_time", std::make_shared(), "When the connection was established."}, /* 5 */ {"session_uptime_elapsed_seconds", std::make_shared(), "Seconds elapsed since the connection was established."}, /* 6 */ {"is_expired", std::make_shared(), "Is the current connection expired."}, @@ -64,7 +65,7 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co /// For read-only snapshot type functionality, it's acceptable even though 'getZooKeeper' may cause data inconsistency. auto fill_data = [&](const String & name, const zkutil::ZooKeeperPtr zookeeper, MutableColumns & columns) { - Int8 index = zookeeper->getConnectedHostIdx(); + auto index = zookeeper->getConnectedHostIdx(); String host_port = zookeeper->getConnectedHostPort(); if (index != -1 && !host_port.empty()) { @@ -78,7 +79,10 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co columns[0]->insert(name); columns[1]->insert(host); columns[2]->insert(port); - columns[3]->insert(index); + if (index) + columns[3]->insert(*index); + else + columns[3]->insertDefault(); columns[4]->insert(connected_time); columns[5]->insert(uptime); columns[6]->insert(zookeeper->expired()); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index ac091e7cf3c..d674f054632 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -172,19 +172,19 @@ static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndType /// with subqueries it's possible that new analyzer will be enabled in ::read method /// of underlying storage when all other parts of infra are not ready for it /// (built with old analyzer). - context_copy->setSetting("allow_experimental_analyzer", Field{0}); + context_copy->setSetting("allow_experimental_analyzer", false); auto syntax_analyzer_result = TreeRewriter(context_copy).analyze(ast, columns); ExpressionAnalyzer analyzer(ast, syntax_analyzer_result, context_copy); auto dag = analyzer.getActionsDAG(false); - const auto * col = &dag->findInOutputs(ast->getColumnName()); + const auto * col = &dag.findInOutputs(ast->getColumnName()); if (col->result_name != ttl_string) - col = &dag->addAlias(*col, ttl_string); + col = &dag.addAlias(*col, ttl_string); - dag->getOutputs() = {col}; - dag->removeUnusedActions(); + dag.getOutputs() = {col}; + dag.removeUnusedActions(); - result.expression = std::make_shared(dag, ExpressionActionsSettings::fromContext(context_copy)); + result.expression = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context_copy)); result.sets = analyzer.getPreparedSets(); return result; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 27c52124e9c..90c2c7f93c1 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -77,15 +77,19 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context) } } -void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context) +ExpressionActionsPtr buildFilterExpression(ActionsDAG dag, ContextPtr context) +{ + buildSetsForDAG(dag, context); + return std::make_shared(std::move(dag)); +} + +void filterBlockWithExpression(const ExpressionActionsPtr & actions, Block & block) { - buildSetsForDAG(*dag, context); - auto actions = std::make_shared(dag); Block block_with_filter = block; actions->execute(block_with_filter, /*dry_run=*/ false, /*allow_duplicates_in_input=*/ true); /// Filter the block. - String filter_column_name = dag->getOutputs().at(0)->result_name; + String filter_column_name = actions->getActionsDAG().getOutputs().at(0)->result_name; ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column->convertToFullColumnIfConst(); ConstantFilterDescription constant_filter(*filter_column); @@ -155,7 +159,7 @@ static void addPathAndFileToVirtualColumns(Block & block, const String & path, s block.getByName("_idx").column->assumeMutableRef().insert(idx); } -ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns) +std::optional createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns) { if (!predicate || virtual_columns.empty()) return {}; @@ -171,7 +175,7 @@ ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, con return splitFilterDagForAllowedInputs(predicate, &block); } -ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context) +ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const ExpressionActionsPtr & actions, const NamesAndTypesList & virtual_columns) { Block block; for (const auto & column : virtual_columns) @@ -184,7 +188,7 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const for (size_t i = 0; i != paths.size(); ++i) addPathAndFileToVirtualColumns(block, paths[i], i); - filterBlockWithDAG(dag, block, context); + filterBlockWithExpression(actions, block); return block.getByName("_idx").column; } @@ -271,7 +275,8 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( const ActionsDAG::Node * node, const Block * allowed_inputs, - ActionsDAG::Nodes & additional_nodes) + ActionsDAG::Nodes & additional_nodes, + bool allow_non_deterministic_functions) { if (node->type == ActionsDAG::ActionType::FUNCTION) { @@ -280,8 +285,14 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto & node_copy = additional_nodes.emplace_back(*node); node_copy.children.clear(); for (const auto * child : node->children) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes)) + if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) node_copy.children.push_back(child_copy); + /// Expression like (now_allowed AND allowed) is not allowed if allow_non_deterministic_functions = true. This is important for + /// trivial count optimization, otherwise we can get incorrect results. For example, if the query is + /// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply + /// trivial count. + else if (!allow_non_deterministic_functions) + return nullptr; if (node_copy.children.empty()) return nullptr; @@ -307,7 +318,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( { auto & node_copy = additional_nodes.emplace_back(*node); for (auto & child : node_copy.children) - if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes); !child) + if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions); !child) return nullptr; return &node_copy; @@ -318,10 +329,10 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( { if (const auto * index_hint = typeid_cast(adaptor->getFunction().get())) { - auto index_hint_dag = index_hint->getActions()->clone(); + auto index_hint_dag = index_hint->getActions().clone(); ActionsDAG::NodeRawConstPtrs atoms; - for (const auto & output : index_hint_dag->getOutputs()) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes)) + for (const auto & output : index_hint_dag.getOutputs()) + if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) atoms.push_back(child_copy); if (!atoms.empty()) @@ -331,13 +342,13 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( if (atoms.size() > 1) { FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); - res = &index_hint_dag->addFunction(func_builder_and, atoms, {}); + res = &index_hint_dag.addFunction(func_builder_and, atoms, {}); } if (!res->result_type->equals(*node->result_type)) - res = &index_hint_dag->addCast(*res, node->result_type, {}); + res = &index_hint_dag.addCast(*res, node->result_type, {}); - additional_nodes.splice(additional_nodes.end(), ActionsDAG::detachNodes(std::move(*index_hint_dag))); + additional_nodes.splice(additional_nodes.end(), ActionsDAG::detachNodes(std::move(index_hint_dag))); return res; } } @@ -355,24 +366,24 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( return node; } -ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs) +std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions) { if (!predicate) - return nullptr; + return {}; ActionsDAG::Nodes additional_nodes; - const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes); + const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_non_deterministic_functions); if (!res) - return nullptr; + return {}; return ActionsDAG::cloneSubDAG({res}, true); } void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context) { - auto dag = splitFilterDagForAllowedInputs(predicate, &block); + auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_non_deterministic_functions=*/ false); if (dag) - filterBlockWithDAG(dag, block, context); + filterBlockWithExpression(buildFilterExpression(std::move(*dag), context), block); } } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 9045a2f5481..73b7908b75c 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -23,7 +23,8 @@ namespace VirtualColumnUtils void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context); /// Just filters block. Block should contain all the required columns. -void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context); +ExpressionActionsPtr buildFilterExpression(ActionsDAG dag, ContextPtr context); +void filterBlockWithExpression(const ExpressionActionsPtr & actions, Block & block); /// Builds sets used by ActionsDAG inplace. void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); @@ -32,7 +33,15 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); /// Extract a part of predicate that can be evaluated using only columns from input_names. -ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs); +/// When allow_non_deterministic_functions is true then even if the predicate contains non-deterministic +/// functions, we still allow to extract a part of the predicate, otherwise we return nullptr. +/// allow_non_deterministic_functions must be false when we are going to use the result to filter parts in +/// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is +/// `SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1` +/// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is +/// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial +/// count optimization will be mistakenly applied to the query. +std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions = true); /// Extract from the input stream a set of `name` column values template @@ -49,14 +58,14 @@ auto extractSingleValueFromBlock(const Block & block, const String & name) NameSet getVirtualNamesForFileLikeStorage(); VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns); -ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns); +std::optional createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns); -ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context); +ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const ExpressionActionsPtr & actions, const NamesAndTypesList & virtual_columns); template -void filterByPathOrFile(std::vector & sources, const std::vector & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context) +void filterByPathOrFile(std::vector & sources, const std::vector & paths, const ExpressionActionsPtr & actions, const NamesAndTypesList & virtual_columns) { - auto indexes_column = getFilterByPathAndFileIndexes(paths, dag, virtual_columns, context); + auto indexes_column = getFilterByPathAndFileIndexes(paths, actions, virtual_columns); const auto & indexes = typeid_cast(*indexes_column).getData(); if (indexes.size() == sources.size()) return; diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e15da0074d5..65bf6768b1b 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -565,11 +566,11 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) auto syntax_result = TreeRewriter(getContext()).analyze(filter_function, builder.getHeader().getNamesAndTypesList()); auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, getContext()).getActionsDAG(false); + auto filter_actions = std::make_shared(std::move(filter_expression)); builder.addSimpleTransform([&](const Block & header) { - return std::make_shared( - header, std::make_shared(filter_expression), filter_function->getColumnName(), true); + return std::make_shared(header, filter_actions, filter_function->getColumnName(), true); }); /// Adding window column @@ -594,7 +595,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) new_header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); auto actions = std::make_shared( - convert_actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + std::move(convert_actions_dag), ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); builder.addSimpleTransform([&](const Block & stream_header) { return std::make_shared(stream_header, actions); @@ -708,7 +709,7 @@ inline void StorageWindowView::fire(UInt32 watermark) getTargetTable()->getInMemoryMetadataPtr()->getColumns(), getContext(), getContext()->getSettingsRef().insert_null_as_default); - auto adding_missing_defaults_actions = std::make_shared(adding_missing_defaults_dag); + auto adding_missing_defaults_actions = std::make_shared(std::move(adding_missing_defaults_dag)); pipe.addSimpleTransform([&](const Block & stream_header) { return std::make_shared(stream_header, adding_missing_defaults_actions); @@ -719,7 +720,7 @@ inline void StorageWindowView::fire(UInt32 watermark) block_io.pipeline.getHeader().getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Position); auto actions = std::make_shared( - convert_actions_dag, + std::move(convert_actions_dag), ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); pipe.addSimpleTransform([&](const Block & stream_header) { @@ -805,7 +806,7 @@ ASTPtr StorageWindowView::getInnerTableCreateQuery(const ASTPtr & inner_query, c { auto column_window = std::make_shared(); column_window->name = window_id_name; - column_window->type = std::make_shared("UInt32"); + column_window->type = makeASTDataType("UInt32"); columns_list->children.push_back(column_window); } @@ -1139,7 +1140,7 @@ void StorageWindowView::read( { auto converting_actions = ActionsDAG::makeConvertingActions( target_header.getColumnsWithTypeAndName(), wv_header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); - auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), converting_actions); + auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(converting_actions)); converting_step->setStepDescription("Convert Target table structure to WindowView structure"); query_plan.addStep(std::move(converting_step)); } @@ -1188,6 +1189,7 @@ StorageWindowView::StorageWindowView( ContextPtr context_, const ASTCreateQuery & query, const ColumnsDescription & columns_, + const String & comment, LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) @@ -1206,11 +1208,15 @@ StorageWindowView::StorageWindowView( StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); /// If the target table is not set, use inner target table - has_inner_target_table = query.to_table_id.empty(); - if (has_inner_target_table && !query.storage) + auto to_table_id = query.getTargetTableID(ViewTarget::To); + has_inner_target_table = to_table_id.empty(); + auto to_table_engine = query.getTargetInnerEngine(ViewTarget::To); + + if (has_inner_target_table && !to_table_engine) throw Exception(ErrorCodes::INCORRECT_QUERY, "You must specify where to save results of a WindowView query: " "either ENGINE or an existing table in a TO clause"); @@ -1225,12 +1231,12 @@ StorageWindowView::StorageWindowView( auto inner_query = initInnerQuery(query.select->list_of_selects->children.at(0)->as(), context_); - if (query.inner_storage) - inner_table_engine = query.inner_storage->clone(); + if (auto inner_storage = query.getTargetInnerEngine(ViewTarget::Inner)) + inner_table_engine = inner_storage->clone(); inner_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID())); inner_fetch_query = generateInnerFetchQuery(inner_table_id); - target_table_id = has_inner_target_table ? StorageID(table_id_.database_name, generateTargetTableName(table_id_)) : query.to_table_id; + target_table_id = has_inner_target_table ? StorageID(table_id_.database_name, generateTargetTableName(table_id_)) : to_table_id; if (is_proctime) next_fire_signal = getWindowUpperBound(now()); @@ -1255,7 +1261,7 @@ StorageWindowView::StorageWindowView( new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); target_create_query->set(target_create_query->columns_list, new_columns_list); - target_create_query->set(target_create_query->storage, query.storage->ptr()); + target_create_query->set(target_create_query->storage, to_table_engine); InterpreterCreateQuery create_interpreter_(target_create_query, create_context_); create_interpreter_.setInternal(true); @@ -1484,7 +1490,7 @@ void StorageWindowView::writeIntoWindowView( pipe.addSimpleTransform([&](const Block & header_) { return std::make_shared( - header_, std::make_shared(filter_expression), + header_, std::make_shared(std::move(filter_expression)), filter_function->getColumnName(), true); }); } @@ -1623,7 +1629,7 @@ void StorageWindowView::writeIntoWindowView( output->getHeader().getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); auto convert_actions = std::make_shared( - convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); + std::move(convert_actions_dag), ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); } @@ -1761,7 +1767,7 @@ void registerStorageWindowView(StorageFactory & factory) "Experimental WINDOW VIEW feature " "is not enabled (the setting 'allow_experimental_window_view')"); - return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.columns, args.mode); + return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.columns, args.comment, args.mode); }); } diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 14ac65091d3..38fca512ed9 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -111,6 +111,7 @@ public: ContextPtr context_, const ASTCreateQuery & query, const ColumnsDescription & columns_, + const String & comment, LoadingStrictnessLevel mode); String getName() const override { return "WindowView"; } diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index adc1074b1fe..8f33314397c 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -26,7 +26,6 @@ void registerStorageGenerateRandom(StorageFactory & factory); void registerStorageExecutable(StorageFactory & factory); void registerStorageWindowView(StorageFactory & factory); void registerStorageLoop(StorageFactory & factory); -void registerStorageFuzzQuery(StorageFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerStorageFuzzJSON(StorageFactory & factory); #endif @@ -127,7 +126,6 @@ void registerStorages() registerStorageExecutable(factory); registerStorageWindowView(factory); registerStorageLoop(factory); - registerStorageFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerStorageFuzzJSON(factory); #endif diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index e5676c5c25d..916ff7ec022 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index 2c3802e8667..cccd3587bc7 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -170,7 +170,14 @@ StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, if (settings_query != nullptr) settings.applyChanges(settings_query->as()->changes); - auto storage = std::make_shared(storage_id, format, settings, input_queries, getActualTableStructure(context, is_insert_query), ConstraintsDescription{}); + auto storage = std::make_shared( + storage_id, + format, + settings, + input_queries, + getActualTableStructure(context, is_insert_query), + ConstraintsDescription{}, + /* comment = */ ""); storage->startup(); return storage; } diff --git a/src/TableFunctions/TableFunctionFuzzQuery.cpp b/src/TableFunctions/TableFunctionFuzzQuery.cpp deleted file mode 100644 index 224f6666556..00000000000 --- a/src/TableFunctions/TableFunctionFuzzQuery.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include - -#include -#include -#include -#include - -namespace DB -{ - - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - -void TableFunctionFuzzQuery::parseArguments(const ASTPtr & ast_function, ContextPtr context) -{ - ASTs & args_func = ast_function->children; - - if (args_func.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName()); - - auto args = args_func.at(0)->children; - configuration = StorageFuzzQuery::getConfiguration(args, context); -} - -StoragePtr TableFunctionFuzzQuery::executeImpl( - const ASTPtr & /*ast_function*/, - ContextPtr context, - const std::string & table_name, - ColumnsDescription /*cached_columns*/, - bool is_insert_query) const -{ - ColumnsDescription columns = getActualTableStructure(context, is_insert_query); - auto res = std::make_shared( - StorageID(getDatabaseName(), table_name), - columns, - /* comment */ String{}, - configuration); - res->startup(); - return res; -} - -void registerTableFunctionFuzzQuery(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description = "Perturbs a query string with random variations.", - .returned_value = "A table object with a single column containing perturbed query strings."}, - .allow_readonly = true}); -} - -} diff --git a/src/TableFunctions/TableFunctionFuzzQuery.h b/src/TableFunctions/TableFunctionFuzzQuery.h deleted file mode 100644 index 22d10341c4d..00000000000 --- a/src/TableFunctions/TableFunctionFuzzQuery.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include - -#include -#include -#include - -#include "config.h" - -namespace DB -{ - -class TableFunctionFuzzQuery : public ITableFunction -{ -public: - static constexpr auto name = "fuzzQuery"; - std::string getName() const override { return name; } - - void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - - ColumnsDescription getActualTableStructure(ContextPtr /* context */, bool /* is_insert_query */) const override - { - return ColumnsDescription{{"query", std::make_shared()}}; - } - -private: - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "fuzzQuery"; } - - String source; - std::optional random_seed; - StorageFuzzQuery::Configuration configuration; -}; - -} diff --git a/src/TableFunctions/TableFunctionSQLite.cpp b/src/TableFunctions/TableFunctionSQLite.cpp index e367e05bf73..87353025d1d 100644 --- a/src/TableFunctions/TableFunctionSQLite.cpp +++ b/src/TableFunctions/TableFunctionSQLite.cpp @@ -57,7 +57,7 @@ StoragePtr TableFunctionSQLite::executeImpl(const ASTPtr & /*ast_function*/, sqlite_db, database_path, remote_table_name, - cached_columns, ConstraintsDescription{}, context); + cached_columns, ConstraintsDescription{}, /* comment = */ "", context); storage->startup(); return storage; diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index a6c90872f12..ca4913898f9 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -26,7 +26,6 @@ void registerTableFunctions() registerTableFunctionMongoDB(factory); registerTableFunctionRedis(factory); registerTableFunctionMergeTreeIndex(factory); - registerTableFunctionFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerTableFunctionFuzzJSON(factory); #endif diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 2a8864a9bfd..efde4d6dcdc 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -23,7 +23,6 @@ void registerTableFunctionGenerate(TableFunctionFactory & factory); void registerTableFunctionMongoDB(TableFunctionFactory & factory); void registerTableFunctionRedis(TableFunctionFactory & factory); void registerTableFunctionMergeTreeIndex(TableFunctionFactory & factory); -void registerTableFunctionFuzzQuery(TableFunctionFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerTableFunctionFuzzJSON(TableFunctionFactory & factory); #endif diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 75f61baa854..d22bf674df4 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -173,5 +173,8 @@ endif() if (TARGET ch_contrib::prometheus_protobufs) set(USE_PROMETHEUS_PROTOBUFS 1) endif() +if (TARGET ch_contrib::numactl) + set(USE_NUMACTL 1) +endif() set(SOURCE_DIR ${PROJECT_SOURCE_DIR}) diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py index 98a0345c6bd..86dcaf79854 100644 --- a/tests/ci/artifactory.py +++ b/tests/ci/artifactory.py @@ -43,7 +43,6 @@ class R2MountPoint: self.bucket_name = self._PROD_BUCKET_NAME self.aux_mount_options = "" - self.async_mount = False if self.app == MountPointApp.S3FS: self.cache_dir = "/home/ubuntu/s3fs_cache" # self.aux_mount_options += "-o nomodtime " if self.NOMODTIME else "" not for s3fs @@ -57,7 +56,6 @@ class R2MountPoint: self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}" elif self.app == MountPointApp.RCLONE: # run rclone mount process asynchronously, otherwise subprocess.run(daemonized command) will not return - self.async_mount = True self.cache_dir = "/home/ubuntu/rclone_cache" self.aux_mount_options += "--no-modtime " if self.NOMODTIME else "" self.aux_mount_options += "-v " if self.DEBUG else "" # -vv too verbose @@ -85,10 +83,12 @@ class R2MountPoint: Shell.run(_UNMOUNT_CMD) Shell.run(_MKDIR_CMD) Shell.run(_MKDIR_FOR_CACHE) - # didn't manage to use simple run() and not block or fail - Shell.run_as_daemon(self.mount_cmd) - if self.async_mount: - time.sleep(3) + if self.app == MountPointApp.S3FS: + Shell.run(self.mount_cmd, check=True) + else: + # didn't manage to use simple run() and without blocking or failure + Shell.run_as_daemon(self.mount_cmd) + time.sleep(3) Shell.run(_TEST_MOUNT_CMD, check=True) @classmethod @@ -107,6 +107,7 @@ class DebianArtifactory: _PROD_REPO_URL = "https://packages.clickhouse.com/deb" def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.release_info = release_info self.codename = release_info.codename self.version = release_info.version if dry_run: @@ -154,9 +155,8 @@ class DebianArtifactory: print("Running test command:") print(f" {cmd}") Shell.run(cmd, check=True) - release_info = ReleaseInfo.from_file() - release_info.debian_command = debian_command - release_info.dump() + self.release_info.debian_command = debian_command + self.release_info.dump() def _copy_if_not_exists(src: Path, dst: Path) -> Path: @@ -177,6 +177,7 @@ class RpmArtifactory: _SIGN_KEY = "885E2BDCF96B0B45ABF058453E4AD4719DDE9A38" def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.release_info = release_info self.codename = release_info.codename self.version = release_info.version if dry_run: @@ -230,9 +231,8 @@ class RpmArtifactory: print("Running test command:") print(f" {cmd}") Shell.run(cmd, check=True) - release_info = ReleaseInfo.from_file() - release_info.rpm_command = rpm_command - release_info.dump() + self.release_info.rpm_command = rpm_command + self.release_info.dump() class TgzArtifactory: @@ -240,6 +240,7 @@ class TgzArtifactory: _PROD_REPO_URL = "https://packages.clickhouse.com/tgz" def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.release_info = release_info self.codename = release_info.codename self.version = release_info.version if dry_run: @@ -290,9 +291,8 @@ class TgzArtifactory: expected_checksum == actual_checksum ), f"[{actual_checksum} != {expected_checksum}]" Shell.run("rm /tmp/tmp.tgz*") - release_info = ReleaseInfo.from_file() - release_info.tgz_command = cmd - release_info.dump() + self.release_info.tgz_command = cmd + self.release_info.dump() def parse_args() -> argparse.Namespace: @@ -340,9 +340,7 @@ def parse_args() -> argparse.Namespace: if __name__ == "__main__": args = parse_args() - assert args.dry_run - release_info = ReleaseInfo.from_file() """ Use S3FS. RCLONE has some errors with r2 remote which I didn't figure out how to resolve: ERROR : IO error: NotImplemented: versionId not implemented @@ -350,26 +348,38 @@ if __name__ == "__main__": """ mp = R2MountPoint(MountPointApp.S3FS, dry_run=args.dry_run) if args.export_debian: - with ReleaseContextManager(release_progress=ReleaseProgress.EXPORT_DEB) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.EXPORT_DEB + ) as release_info: mp.init() DebianArtifactory(release_info, dry_run=args.dry_run).export_packages() mp.teardown() if args.export_rpm: - with ReleaseContextManager(release_progress=ReleaseProgress.EXPORT_RPM) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.EXPORT_RPM + ) as release_info: mp.init() RpmArtifactory(release_info, dry_run=args.dry_run).export_packages() mp.teardown() if args.export_tgz: - with ReleaseContextManager(release_progress=ReleaseProgress.EXPORT_TGZ) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.EXPORT_TGZ + ) as release_info: mp.init() TgzArtifactory(release_info, dry_run=args.dry_run).export_packages() mp.teardown() if args.test_debian: - with ReleaseContextManager(release_progress=ReleaseProgress.TEST_DEB) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.TEST_DEB + ) as release_info: DebianArtifactory(release_info, dry_run=args.dry_run).test_packages() if args.test_tgz: - with ReleaseContextManager(release_progress=ReleaseProgress.TEST_TGZ) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.TEST_TGZ + ) as release_info: TgzArtifactory(release_info, dry_run=args.dry_run).test_packages() if args.test_rpm: - with ReleaseContextManager(release_progress=ReleaseProgress.TEST_RPM) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.TEST_RPM + ) as release_info: RpmArtifactory(release_info, dry_run=args.dry_run).test_packages() diff --git a/tests/ci/auto_release.py b/tests/ci/auto_release.py index 39ab3156c80..f2386fe207f 100644 --- a/tests/ci/auto_release.py +++ b/tests/ci/auto_release.py @@ -191,7 +191,7 @@ def main(): title=f"Auto Release Status for {release_info.release_branch}", body=release_info.to_dict(), ) - if args.post_auto_release_complete: + elif args.post_auto_release_complete: assert args.wf_status, "--wf-status Required with --post-auto-release-complete" if args.wf_status != SUCCESS: CIBuddy(dry_run=False).post_job_error( diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index a7fc6d02853..0b2aa9a2d35 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -420,7 +420,12 @@ class Backport: fetch_release_prs = self.gh.get_release_pulls(self._fetch_from) fetch_release_branches = [pr.head.ref for pr in fetch_release_prs] self.labels_to_backport = [ - f"v{branch}-must-backport" for branch in fetch_release_branches + ( + f"v{branch}-must-backport" + if self._repo_name == "ClickHouse/ClickHouse" + else f"v{branch.replace('release/','')}-must-backport" + ) + for branch in fetch_release_branches ] logging.info("Fetching from %s", self._fetch_from) @@ -490,17 +495,23 @@ class Backport: def process_pr(self, pr: PullRequest) -> None: pr_labels = [label.name for label in pr.labels] - if ( - any(label in pr_labels for label in self.must_create_backport_labels) - or self._repo_name != self._fetch_from - ): + if any(label in pr_labels for label in self.must_create_backport_labels): branches = [ ReleaseBranch(br, pr, self.repo, self.backport_created_label) for br in self.release_branches ] # type: List[ReleaseBranch] else: branches = [ - ReleaseBranch(br, pr, self.repo, self.backport_created_label) + ReleaseBranch( + ( + br + if self._repo_name == "ClickHouse/Clickhouse" + else f"release/{br}" + ), + pr, + self.repo, + self.backport_created_label, + ) for br in [ label.split("-", 1)[0][1:] # v21.8-must-backport for label in pr_labels diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 171819e2632..935fe472e50 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -95,6 +95,12 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: action="store_true", help="Action that configures ci run. Calculates digests, checks job to be executed, generates json output", ) + parser.add_argument( + "--workflow", + default="", + type=str, + help="Workflow Name, to be provided with --configure for workflow-specific CI runs", + ) parser.add_argument( "--update-gh-statuses", action="store_true", @@ -287,7 +293,10 @@ def _pre_action(s3, job_name, batch, indata, pr_info): # for release/master branches reports must be from the same branch report_prefix = "" if pr_info.is_master or pr_info.is_release: - report_prefix = normalize_string(pr_info.head_ref) + # do not set report prefix for scheduled or dispatched wf (in case it started from feature branch while + # testing), otherwise reports won't be found + if not (pr_info.is_scheduled or pr_info.is_dispatched): + report_prefix = normalize_string(pr_info.head_ref) print( f"Use report prefix [{report_prefix}], pr_num [{pr_info.number}], head_ref [{pr_info.head_ref}]" ) @@ -520,6 +529,7 @@ def _configure_jobs( pr_info: PRInfo, ci_settings: CiSettings, skip_jobs: bool, + workflow_name: str = "", dry_run: bool = False, ) -> CiCache: """ @@ -537,18 +547,27 @@ def _configure_jobs( is_docs_only=pr_info.has_changes_in_documentation_only(), is_master=pr_info.is_master, is_pr=pr_info.is_pr, + workflow_name=workflow_name, ) else: job_configs = {} - # filter jobs in accordance with ci settings - job_configs = ci_settings.apply( - job_configs, - pr_info.is_release, - is_pr=pr_info.is_pr, - is_mq=pr_info.is_merge_queue, - labels=pr_info.labels, - ) + if not workflow_name: + # filter jobs in accordance with ci settings + job_configs = ci_settings.apply( + job_configs, + pr_info.is_release, + is_pr=pr_info.is_pr, + is_mq=pr_info.is_merge_queue, + labels=pr_info.labels, + ) + + # add all job batches to job's to_do batches + for _job, job_config in job_configs.items(): + batches = [] + for batch in range(job_config.num_batches): + batches.append(batch) + job_config.batches = batches # check jobs in ci cache ci_cache = CiCache.calc_digests_and_create( @@ -747,7 +766,9 @@ def _upload_build_artifacts( int(job_report.duration), GITHUB_JOB_API_URL(), head_ref=pr_info.head_ref, - pr_number=pr_info.number, + # PRInfo fetches pr number for release branches as well - set pr_number to 0 for release + # so that build results are not mistakenly treated as feature branch builds + pr_number=pr_info.number if pr_info.is_pr else 0, ) report_url = ci_cache.upload_build_report(build_result) print(f"Report file has been uploaded to [{report_url}]") @@ -1102,6 +1123,7 @@ def main() -> int: pr_info, ci_settings, args.skip_jobs, + args.workflow, ) ci_cache.print_status() @@ -1111,7 +1133,7 @@ def main() -> int: if IS_CI and not pr_info.is_merge_queue: - if pr_info.is_release: + if pr_info.is_release and pr_info.is_push_event: print("Release/master: CI Cache add pending records for all todo jobs") ci_cache.push_pending_all(pr_info.is_release) diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py index 3eba5532e66..dfb5885270a 100644 --- a/tests/ci/ci_buddy.py +++ b/tests/ci/ci_buddy.py @@ -1,3 +1,4 @@ +import argparse import json import os from typing import Union, Dict @@ -7,7 +8,7 @@ import requests from botocore.exceptions import ClientError from pr_info import PRInfo -from ci_utils import Shell +from ci_utils import Shell, GHActions class CIBuddy: @@ -29,6 +30,12 @@ class CIBuddy: self.commit_url = pr_info.commit_html_url self.sha = pr_info.sha[:10] + def check_workflow(self): + GHActions.print_workflow_results() + res = GHActions.get_workflow_job_result(GHActions.ActionsNames.RunConfig) + if res != GHActions.ActionStatuses.SUCCESS: + self.post_job_error("Workflow Configuration Failed", critical=True) + @staticmethod def _get_webhooks(): name = "ci_buddy_web_hooks" @@ -139,7 +146,30 @@ class CIBuddy: self.post(message) +def parse_args(): + parser = argparse.ArgumentParser("CI Buddy bot notifies about CI events") + parser.add_argument( + "--check-wf-status", + action="store_true", + help="Checks workflow status", + ) + parser.add_argument( + "--test", + action="store_true", + help="for test and debug", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="dry run mode", + ) + return parser.parse_args(), parser + + if __name__ == "__main__": - # test - buddy = CIBuddy(dry_run=True) - buddy.post_job_error("TEst") + args, parser = parse_args() + + if args.test: + CIBuddy(dry_run=True).post_job_error("TEst") + elif args.check_wf_status: + CIBuddy(dry_run=args.dry_run).check_workflow() diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index a44b15f34c1..a7df884a091 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -37,11 +37,22 @@ class CI: from ci_utils import GHActions as GHActions from ci_definitions import Labels as Labels from ci_definitions import TRUSTED_CONTRIBUTORS as TRUSTED_CONTRIBUTORS + from ci_definitions import WorkFlowNames as WorkFlowNames from ci_utils import CATEGORY_TO_LABEL as CATEGORY_TO_LABEL # Jobs that run for doc related updates _DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK] + WORKFLOW_CONFIGS = { + WorkFlowNames.JEPSEN: LabelConfig( + run_jobs=[ + BuildNames.BINARY_RELEASE, + JobNames.JEPSEN_KEEPER, + JobNames.JEPSEN_SERVER, + ] + ) + } # type: Dict[str, LabelConfig] + TAG_CONFIGS = { Tags.DO_NOT_TEST_LABEL: LabelConfig(run_jobs=[JobNames.STYLE_CHECK]), Tags.CI_SET_ARM: LabelConfig( @@ -68,7 +79,7 @@ class CI: JobNames.STATEFUL_TEST_ASAN, ] ), - } + } # type: Dict[str, LabelConfig] JOB_CONFIGS: Dict[str, JobConfig] = { BuildNames.PACKAGE_RELEASE: CommonJobConfigs.BUILD.with_properties( @@ -508,10 +519,10 @@ class CI: runner_type=Runners.STYLE_CHECKER, ), JobNames.DOCKER_SERVER: CommonJobConfigs.DOCKER_SERVER.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE] + required_builds=[BuildNames.PACKAGE_RELEASE, BuildNames.PACKAGE_AARCH64] ), JobNames.DOCKER_KEEPER: CommonJobConfigs.DOCKER_SERVER.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE] + required_builds=[BuildNames.PACKAGE_RELEASE, BuildNames.PACKAGE_AARCH64] ), JobNames.DOCS_CHECK: JobConfig( digest=DigestConfig( @@ -576,10 +587,10 @@ class CI: if job_name in REQUIRED_CHECKS: stage_type = WorkflowStages.TESTS_1 else: - stage_type = WorkflowStages.TESTS_3 + stage_type = WorkflowStages.TESTS_2 assert stage_type, f"BUG [{job_name}]" - if non_blocking_ci and stage_type == WorkflowStages.TESTS_3: - stage_type = WorkflowStages.TESTS_2 + if non_blocking_ci and stage_type == WorkflowStages.TESTS_2: + stage_type = WorkflowStages.TESTS_2_WW return stage_type @classmethod @@ -599,16 +610,25 @@ class CI: @classmethod def get_workflow_jobs_with_configs( - cls, is_mq: bool, is_docs_only: bool, is_master: bool, is_pr: bool + cls, + is_mq: bool, + is_docs_only: bool, + is_master: bool, + is_pr: bool, + workflow_name: str, ) -> Dict[str, JobConfig]: """ get a list of all jobs for a workflow with configs """ - jobs = [] if is_mq: jobs = MQ_JOBS elif is_docs_only: jobs = cls._DOCS_CHECK_JOBS + elif workflow_name: + assert ( + workflow_name in cls.WORKFLOW_CONFIGS + ), "Workflow name if provided must be configured in WORKFLOW_CONFIGS" + jobs = list(cls.WORKFLOW_CONFIGS[workflow_name].run_jobs) else: # add all jobs jobs = list(cls.JOB_CONFIGS) diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index a8d9793f1d3..054b554b8fa 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -67,10 +67,10 @@ class WorkflowStages(metaclass=WithIter): BUILDS_2 = "Builds_2" # all tests required for merge TESTS_1 = "Tests_1" - # not used atm - TESTS_2 = "Tests_2" + # used in woolenwolfdog mode + TESTS_2_WW = "Tests_2_ww" # all tests not required for merge - TESTS_3 = "Tests_3" + TESTS_2 = "Tests_2" class Runners(metaclass=WithIter): @@ -106,6 +106,14 @@ class Tags(metaclass=WithIter): libFuzzer = "libFuzzer" +class WorkFlowNames(metaclass=WithIter): + """ + CI WorkFlow Names for custom CI runs + """ + + JEPSEN = "JepsenWorkflow" + + class BuildNames(metaclass=WithIter): """ Build' job names diff --git a/tests/ci/ci_settings.py b/tests/ci/ci_settings.py index c29c5777dba..d6e9765ceb7 100644 --- a/tests/ci/ci_settings.py +++ b/tests/ci/ci_settings.py @@ -234,11 +234,4 @@ class CiSettings: for job in add_parents: res[job] = job_configs[job] - for job, job_config in res.items(): - batches = [] - for batch in range(job_config.num_batches): - if not self.job_batches or batch in self.job_batches: - batches.append(batch) - job_config.batches = batches - return res diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index efbf014cd52..447aac74c7f 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,3 +1,4 @@ +import json import os import re import subprocess @@ -11,6 +12,9 @@ import requests class Envs: GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") + WORKFLOW_RESULT_FILE = os.getenv( + "WORKFLOW_RESULT_FILE", "/tmp/workflow_results.json" + ) LABEL_CATEGORIES = { @@ -79,6 +83,47 @@ def normalize_string(string: str) -> str: class GHActions: + class ActionsNames: + RunConfig = "RunConfig" + + class ActionStatuses: + ERROR = "error" + FAILURE = "failure" + PENDING = "pending" + SUCCESS = "success" + + @classmethod + def _get_workflow_results(cls): + if not Path(Envs.WORKFLOW_RESULT_FILE).exists(): + print( + f"ERROR: Failed to get workflow results from file [{Envs.WORKFLOW_RESULT_FILE}]" + ) + return {} + with open(Envs.WORKFLOW_RESULT_FILE, "r", encoding="utf-8") as json_file: + try: + res = json.load(json_file) + except json.JSONDecodeError as e: + print(f"ERROR: json decoder exception {e}") + json_file.seek(0) + print(" File content:") + print(json_file.read()) + return {} + return res + + @classmethod + def print_workflow_results(cls): + res = cls._get_workflow_results() + results = [f"{job}: {data['result']}" for job, data in res.items()] + cls.print_in_group("Workflow results", results) + + @classmethod + def get_workflow_job_result(cls, wf_job_name: str) -> Optional[str]: + res = cls._get_workflow_results() + if wf_job_name in res: + return res[wf_job_name]["result"] # type: ignore + else: + return None + @staticmethod def print_in_group(group_name: str, lines: Union[Any, List[Any]]) -> None: lines = list(lines) @@ -167,7 +212,7 @@ class Shell: return res.stdout.strip() @classmethod - def run(cls, command, check=False, dry_run=False): + def run(cls, command, check=False, dry_run=False, **kwargs): if dry_run: print(f"Dry-ryn. Would run command [{command}]") return "" @@ -180,12 +225,14 @@ class Shell: stderr=subprocess.PIPE, text=True, check=False, + **kwargs, ) if result.returncode == 0: + print(f"stdout: {result.stdout.strip()}") res = result.stdout else: print( - f"ERROR: stdout {result.stdout.strip()}, stderr {result.stderr.strip()}" + f"ERROR: stdout: {result.stdout.strip()}, stderr: {result.stderr.strip()}" ) if check: assert result.returncode == 0 diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 91ea5c6d5d3..287970cce9a 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -197,6 +197,10 @@ def get_instance_id(): return _query_imds("latest/meta-data/instance-id") +def get_instance_lifecycle(): + return _query_imds("latest/meta-data/instance-life-cycle") + + def prepare_tests_results_for_clickhouse( pr_info: PRInfo, test_results: TestResults, @@ -233,7 +237,7 @@ def prepare_tests_results_for_clickhouse( "head_ref": head_ref, "head_repo": head_repo, "task_url": pr_info.task_url, - "instance_type": get_instance_type(), + "instance_type": ",".join([get_instance_type(), get_instance_lifecycle()]), "instance_id": get_instance_id(), } diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index 4347cfebb54..a0b4083b673 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -43,6 +43,7 @@ class ReleaseProgress: TEST_TGZ = "test TGZ packages" TEST_RPM = "test RPM packages" TEST_DEB = "test DEB packages" + COMPLETED = "completed" class ReleaseProgressDescription: @@ -108,6 +109,12 @@ class ReleaseInfo: release_progress: str = "" progress_description: str = "" + def is_patch(self): + return self.release_branch != "master" + + def is_new_release_branch(self): + return self.release_branch == "master" + @staticmethod def from_file() -> "ReleaseInfo": with open(RELEASE_INFO_FILE, "r", encoding="utf-8") as json_file: @@ -126,12 +133,12 @@ class ReleaseInfo: release_tag = None previous_release_tag = None previous_release_sha = None - codename = None + codename = "" assert release_type in ("patch", "new") if release_type == "new": # check commit_ref is right and on a right branch Shell.run( - f"git merge-base --is-ancestor origin/{commit_ref} origin/master", + f"git merge-base --is-ancestor {commit_ref} origin/master", check=True, ) with checkout(commit_ref): @@ -146,9 +153,6 @@ class ReleaseInfo: git.latest_tag == expected_prev_tag ), f"BUG: latest tag [{git.latest_tag}], expected [{expected_prev_tag}]" release_tag = version.describe - codename = ( - VersionType.STABLE - ) # dummy value (artifactory won't be updated for new release) previous_release_tag = expected_prev_tag previous_release_sha = Shell.run_strict( f"git rev-parse {previous_release_tag}" @@ -205,7 +209,7 @@ class ReleaseInfo: and commit_sha and release_tag and version - and codename in ("lts", "stable") + and (codename in ("lts", "stable") or release_type == "new") ) self.release_branch = release_branch @@ -320,24 +324,27 @@ class ReleaseInfo: Shell.run( f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" ) - self.version_bump_pr = GHActions.get_pr_url_by_branch( - repo=GITHUB_REPOSITORY, branch=branch_upd_version_contributors - ) + self.version_bump_pr = "dry-run" + else: + self.version_bump_pr = GHActions.get_pr_url_by_branch( + repo=GITHUB_REPOSITORY, branch=branch_upd_version_contributors + ) def update_release_info(self, dry_run: bool) -> "ReleaseInfo": - branch = f"auto/{release_info.release_tag}" - if not dry_run: - url = GHActions.get_pr_url_by_branch(repo=GITHUB_REPOSITORY, branch=branch) - else: - url = "dry-run" - - print(f"ChangeLog PR url [{url}]") - self.changelog_pr = url - print(f"Release url [{url}]") - self.release_url = ( - f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" - ) - self.docker_command = f"docker run --rm clickhouse/clickhouse:{self.release_branch} clickhouse --version" + if self.release_branch != "master": + branch = f"auto/{release_info.release_tag}" + if not dry_run: + url = GHActions.get_pr_url_by_branch( + repo=GITHUB_REPOSITORY, branch=branch + ) + else: + url = "dry-run" + print(f"ChangeLog PR url [{url}]") + self.changelog_pr = url + print(f"Release url [{url}]") + self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" + if self.release_progress == ReleaseProgress.COMPLETED: + self.docker_command = f"docker run --rm clickhouse/clickhouse:{self.version} clickhouse --version" self.dump() return self @@ -712,13 +719,22 @@ if __name__ == "__main__": if args.post_status: release_info = ReleaseInfo.from_file() release_info.update_release_info(dry_run=args.dry_run) - if release_info.debian_command: + if release_info.is_new_release_branch(): + title = "New release branch" + else: + title = "New release" + if ( + release_info.progress_description == ReleaseProgressDescription.OK + and release_info.release_progress == ReleaseProgress.COMPLETED + ): + title = "Completed: " + title CIBuddy(dry_run=args.dry_run).post_done( - f"New release issued", dataclasses.asdict(release_info) + title, dataclasses.asdict(release_info) ) else: + title = "Failed: " + title CIBuddy(dry_run=args.dry_run).post_critical( - f"Failed to issue new release", dataclasses.asdict(release_info) + title, dataclasses.asdict(release_info) ) if args.set_progress_started: diff --git a/tests/ci/docker_images_helper.py b/tests/ci/docker_images_helper.py index 6ea679e0597..e6869852c4e 100644 --- a/tests/ci/docker_images_helper.py +++ b/tests/ci/docker_images_helper.py @@ -3,12 +3,12 @@ import json import logging import os -import subprocess from pathlib import Path from typing import Any, Dict, List, Optional from env_helper import ROOT_DIR, DOCKER_TAG from get_robot_token import get_parameter_from_ssm +from ci_utils import Shell IMAGES_FILE_PATH = Path("docker/images.json") @@ -16,20 +16,14 @@ ImagesDict = Dict[str, dict] def docker_login(relogin: bool = True) -> None: - if ( - relogin - or subprocess.run( # pylint: disable=unexpected-keyword-arg - "docker system info | grep --quiet -E 'Username|Registry'", - shell=True, - check=False, - ).returncode - == 1 + if relogin or not Shell.check( + "docker system info | grep --quiet -E 'Username|Registry'" ): - subprocess.check_output( # pylint: disable=unexpected-keyword-arg + Shell.run( # pylint: disable=unexpected-keyword-arg "docker login --username 'robotclickhouse' --password-stdin", input=get_parameter_from_ssm("dockerhub_robot_password"), encoding="utf-8", - shell=True, + check=True, ) @@ -48,14 +42,10 @@ class DockerImage: def pull_image(image: DockerImage) -> DockerImage: try: logging.info("Pulling image %s - start", image) - subprocess.check_output( - f"docker pull {image}", - stderr=subprocess.STDOUT, - shell=True, - ) + Shell.run(f"docker pull {image}", check=True) logging.info("Pulling image %s - done", image) except Exception as ex: - logging.info("Got execption pulling docker %s", ex) + logging.info("Got exception pulling docker %s", ex) raise ex return image diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 21fc02ce02a..3e782c079c6 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -21,7 +21,7 @@ from env_helper import ( TEMP_PATH, ) from git_helper import Git -from pr_info import PRInfo, EventType +from pr_info import PRInfo from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults from stopwatch import Stopwatch from tee_popen import TeePopen @@ -128,17 +128,9 @@ def parse_args() -> argparse.Namespace: def retry_popen(cmd: str, log_file: Path) -> int: max_retries = 2 - for retry in range(max_retries): - # From time to time docker build may failed. Curl issues, or even push - # It will sleep progressively 5, 15, 30 and 50 seconds between retries - progressive_sleep = 5 * sum(i + 1 for i in range(retry)) - if progressive_sleep: - logging.warning( - "The following command failed, sleep %s before retry: %s", - progressive_sleep, - cmd, - ) - time.sleep(progressive_sleep) + sleep_seconds = 10 + retcode = -1 + for _retry in range(max_retries): with TeePopen( cmd, log_file=log_file, @@ -146,7 +138,14 @@ def retry_popen(cmd: str, log_file: Path) -> int: retcode = process.wait() if retcode == 0: return 0 - + else: + # From time to time docker build may failed. Curl issues, or even push + logging.error( + "The following command failed, sleep %s before retry: %s", + sleep_seconds, + cmd, + ) + time.sleep(sleep_seconds) return retcode @@ -375,25 +374,8 @@ def main(): tags = gen_tags(args.version, args.release_type) repo_urls = {} direct_urls: Dict[str, List[str]] = {} - if pr_info.event_type == EventType.PULL_REQUEST: - release_or_pr = str(pr_info.number) - sha = pr_info.sha - elif pr_info.event_type == EventType.PUSH and pr_info.is_master: - release_or_pr = str(0) - sha = pr_info.sha - else: - release_or_pr = f"{args.version.major}.{args.version.minor}" - sha = args.sha - assert sha for arch, build_name in zip(ARCH, ("package_release", "package_aarch64")): - if not args.bucket_prefix: - repo_urls[arch] = ( - f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/" - f"{release_or_pr}/{sha}/{build_name}" - ) - else: - repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" if args.allow_build_reuse: # read s3 urls from pre-downloaded build reports if "clickhouse-server" in image_repo: @@ -415,6 +397,21 @@ def main(): for url in urls if any(package in url for package in PACKAGES) and "-dbg" not in url ] + elif args.bucket_prefix: + assert not args.allow_build_reuse + repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" + print(f"Bucket prefix is set: Fetching packages from [{repo_urls}]") + elif args.sha: + version = args.version + repo_urls[arch] = ( + f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/" + f"{version.major}.{version.minor}/{args.sha}/{build_name}" + ) + print(f"Fetching packages from [{repo_urls}]") + else: + assert ( + False + ), "--sha, --bucket_prefix or --allow-build-reuse (to fetch packages from build report) must be provided" if push: docker_login() @@ -431,7 +428,6 @@ def main(): ) if test_results[-1].status != "OK": status = FAILURE - pr_info = pr_info or PRInfo() description = f"Processed tags: {', '.join(tags)}" JobReport( diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 21f16d995a4..2b348be8b51 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -29,7 +29,8 @@ CLICKHOUSE_BINARY_PATH = "usr/bin/clickhouse" CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH = "usr/bin/clickhouse-odbc-bridge" CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH = "usr/bin/clickhouse-library-bridge" -FLAKY_TRIES_COUNT = 10 +FLAKY_TRIES_COUNT = 10 # run whole pytest several times +FLAKY_REPEAT_COUNT = 5 # runs test case in single module several times MAX_TIME_SECONDS = 3600 MAX_TIME_IN_SANDBOX = 20 * 60 # 20 minutes @@ -568,6 +569,7 @@ class ClickhouseIntegrationTestsRunner: tests_in_group, num_tries, num_workers, + repeat_count, ): try: return self.run_test_group( @@ -576,6 +578,7 @@ class ClickhouseIntegrationTestsRunner: tests_in_group, num_tries, num_workers, + repeat_count, ) except Exception as e: logging.info("Failed to run %s:\n%s", test_group, e) @@ -598,6 +601,7 @@ class ClickhouseIntegrationTestsRunner: tests_in_group, num_tries, num_workers, + repeat_count, ): counters = { "ERROR": [], @@ -639,6 +643,7 @@ class ClickhouseIntegrationTestsRunner: test_cmd = " ".join([shlex.quote(test) for test in sorted(test_names)]) parallel_cmd = f" --parallel {num_workers} " if num_workers > 0 else "" + repeat_cmd = f" --count {repeat_count} " if repeat_count > 0 else "" # -r -- show extra test summary: # -f -- (f)ailed # -E -- (E)rror @@ -647,7 +652,7 @@ class ClickhouseIntegrationTestsRunner: cmd = ( f"cd {repo_path}/tests/integration && " f"timeout --signal=KILL 1h ./runner {self._get_runner_opts()} " - f"{image_cmd} -t {test_cmd} {parallel_cmd} -- -rfEps --run-id={i} " + f"{image_cmd} -t {test_cmd} {parallel_cmd} {repeat_cmd} -- -rfEps --run-id={i} " f"--color=no --durations=0 {_get_deselect_option(self.should_skip_tests())} " f"| tee {info_path}" ) @@ -784,7 +789,12 @@ class ClickhouseIntegrationTestsRunner: final_retry += 1 logging.info("Running tests for the %s time", i) counters, tests_times, log_paths = self.try_run_test_group( - repo_path, "bugfix" if should_fail else "flaky", tests_to_run, 1, 1 + repo_path, + "bugfix" if should_fail else "flaky", + tests_to_run, + 1, + 1, + FLAKY_REPEAT_COUNT, ) logs += log_paths if counters["FAILED"]: @@ -919,7 +929,7 @@ class ClickhouseIntegrationTestsRunner: for group, tests in items_to_run: logging.info("Running test group %s containing %s tests", group, len(tests)) group_counters, group_test_times, log_paths = self.try_run_test_group( - repo_path, group, tests, MAX_RETRY, NUM_WORKERS + repo_path, group, tests, MAX_RETRY, NUM_WORKERS, 0 ) total_tests = 0 for counter, value in group_counters.items(): diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index f91a3f080c0..772467d4245 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -9,16 +9,13 @@ from pathlib import Path from typing import Any, List import boto3 # type: ignore -import requests from build_download_helper import ( - download_build_with_progress, read_build_urls, ) from compress_files import compress_fast -from env_helper import REPO_COPY, REPORT_PATH, S3_BUILDS_BUCKET, S3_URL, TEMP_PATH +from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH from get_robot_token import get_parameter_from_ssm -from git_helper import git_runner from pr_info import PRInfo from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults from ssh import SSHKey @@ -32,11 +29,10 @@ KEEPER_DESIRED_INSTANCE_COUNT = 3 SERVER_DESIRED_INSTANCE_COUNT = 4 KEEPER_IMAGE_NAME = "clickhouse/keeper-jepsen-test" -KEEPER_CHECK_NAME = "ClickHouse Keeper Jepsen" +KEEPER_CHECK_NAME = CI.JobNames.JEPSEN_KEEPER SERVER_IMAGE_NAME = "clickhouse/server-jepsen-test" -SERVER_CHECK_NAME = "ClickHouse Server Jepsen" - +SERVER_CHECK_NAME = CI.JobNames.JEPSEN_SERVER SUCCESSFUL_TESTS_ANCHOR = "# Successful tests" INTERMINATE_TESTS_ANCHOR = "# Indeterminate tests" @@ -201,36 +197,14 @@ def main(): # always use latest docker_image = KEEPER_IMAGE_NAME if args.program == "keeper" else SERVER_IMAGE_NAME - if pr_info.is_scheduled or pr_info.is_dispatched: - # get latest clickhouse by the static link for latest master buit - get its version and provide permanent url for this version to the jepsen - build_url = f"{S3_URL}/{S3_BUILDS_BUCKET}/master/amd64/clickhouse" - download_build_with_progress(build_url, Path(TEMP_PATH) / "clickhouse") - git_runner.run(f"chmod +x {TEMP_PATH}/clickhouse") - sha = git_runner.run( - f"{TEMP_PATH}/clickhouse local -q \"select value from system.build_options where name='GIT_HASH'\"" - ) - version_full = git_runner.run( - f'{TEMP_PATH}/clickhouse local -q "select version()"' - ) - version = ".".join(version_full.split(".")[0:2]) - assert len(sha) == 40, f"failed to fetch sha from the binary. result: {sha}" - assert ( - version - ), f"failed to fetch version from the binary. result: {version_full}" - build_url = ( - f"{S3_URL}/{S3_BUILDS_BUCKET}/{version}/{sha}/binary_release/clickhouse" - ) - print(f"Clickhouse version: [{version_full}], sha: [{sha}], url: [{build_url}]") - head = requests.head(build_url, timeout=60) - assert head.status_code == 200, f"Clickhouse binary not found: {build_url}" - else: - build_name = CI.get_required_build_name(check_name) - urls = read_build_urls(build_name, REPORT_PATH) - build_url = None - for url in urls: - if url.endswith("clickhouse"): - build_url = url - assert build_url, "No build url found in the report" + # binary_release assumed to be always ready on the master as it's part of the merge queue workflow + build_name = CI.get_required_build_name(check_name) + urls = read_build_urls(build_name, REPORT_PATH) + build_url = None + for url in urls: + if url.endswith("clickhouse"): + build_url = url + assert build_url, "No build url found in the report" extra_args = "" if args.program == "server": diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 59806a2a8fa..5c051b093e0 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -2,6 +2,7 @@ import json import logging import os +import re from typing import Dict, List, Set, Union from urllib.parse import quote @@ -328,7 +329,13 @@ class PRInfo: @property def is_release(self) -> bool: - return self.number == 0 and not self.is_merge_queue + return self.is_master or ( + self.is_push_event + and ( + bool(re.match(r"^2[1-9]\.[1-9][0-9]*$", self.head_ref)) + or bool(re.match(r"^release/2[1-9]\.[1-9][0-9]*$", self.head_ref)) + ) + ) @property def is_pr(self): @@ -337,6 +344,10 @@ class PRInfo: return True return False + @property + def is_push_event(self) -> bool: + return self.event_type == EventType.PUSH + @property def is_scheduled(self) -> bool: return self.event_type == EventType.SCHEDULE diff --git a/tests/ci/report.py b/tests/ci/report.py index 77043dfc8be..f50ed4c1f85 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -125,7 +125,7 @@ html {{ min-height: 100%; font-family: "DejaVu Sans", "Noto Sans", Arial, sans-s h1 {{ margin-left: 10px; }} th, td {{ padding: 5px 10px 5px 10px; text-align: left; vertical-align: top; line-height: 1.5; border: 1px solid var(--table-border-color); }} td {{ background: var(--td-background); }} -th {{ background: var(--th-background); }} +th {{ background: var(--th-background); white-space: nowrap; }} a {{ color: var(--link-color); text-decoration: none; }} a:hover, a:active {{ color: var(--link-hover-color); text-decoration: none; }} table {{ box-shadow: 0 8px 25px -5px rgba(0, 0, 0, var(--shadow-intensity)); border-collapse: collapse; border-spacing: 0; }} @@ -135,6 +135,7 @@ th {{ cursor: pointer; }} tr:hover {{ filter: var(--tr-hover-filter); }} .expandable {{ cursor: pointer; }} .expandable-content {{ display: none; }} +pre {{ white-space: pre-wrap; }} #fish {{ display: none; float: right; position: relative; top: -20em; right: 2vw; margin-bottom: -20em; width: 30vw; filter: brightness(7%); z-index: -1; }} .themes {{ diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 44142050821..f376a129e6f 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -211,7 +211,7 @@ class TestCIConfig(unittest.TestCase): else: self.assertTrue( CI.get_job_ci_stage(job) - in (CI.WorkflowStages.TESTS_1, CI.WorkflowStages.TESTS_3), + in (CI.WorkflowStages.TESTS_1, CI.WorkflowStages.TESTS_2), msg=f"Stage for [{job}] is not correct", ) @@ -242,7 +242,7 @@ class TestCIConfig(unittest.TestCase): else: self.assertTrue( CI.get_job_ci_stage(job, non_blocking_ci=True) - in (CI.WorkflowStages.TESTS_1, CI.WorkflowStages.TESTS_2), + in (CI.WorkflowStages.TESTS_1, CI.WorkflowStages.TESTS_2_WW), msg=f"Stage for [{job}] is not correct", ) @@ -419,6 +419,32 @@ class TestCIConfig(unittest.TestCase): ] self.assertCountEqual(expected_jobs_to_do, actual_jobs_to_do) + def test_ci_py_for_specific_workflow(self): + """ + checks ci.py job configuration + """ + settings = CiSettings() + settings.no_ci_cache = True + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + # make it merge_queue + pr_info.event_type = EventType.SCHEDULE + assert pr_info.number == 0 and not pr_info.is_merge_queue and not pr_info.is_pr + ci_cache = CIPY._configure_jobs( + S3Helper(), + pr_info, + settings, + skip_jobs=False, + dry_run=True, + workflow_name=CI.WorkFlowNames.JEPSEN, + ) + actual_jobs_to_do = list(ci_cache.jobs_to_do) + expected_jobs_to_do = [ + CI.BuildNames.BINARY_RELEASE, + CI.JobNames.JEPSEN_KEEPER, + CI.JobNames.JEPSEN_SERVER, + ] + self.assertCountEqual(expected_jobs_to_do, actual_jobs_to_do) + def test_ci_py_await(self): """ checks ci.py job configuration @@ -452,6 +478,7 @@ class TestCIConfig(unittest.TestCase): pr_info = PRInfo(github_event=_TEST_EVENT_JSON) pr_info.event_type = EventType.PUSH pr_info.number = 0 + pr_info.head_ref = "24.12345" assert pr_info.is_release and not pr_info.is_merge_queue ci_cache = CIPY._configure_jobs( S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True diff --git a/tests/ci/worker/.gitignore b/tests/ci/worker/.gitignore deleted file mode 100644 index 4ed18989e78..00000000000 --- a/tests/ci/worker/.gitignore +++ /dev/null @@ -1 +0,0 @@ -generated_*init_runner.sh diff --git a/tests/ci/worker/deploy-runner-init.sh b/tests/ci/worker/deploy-runner-init.sh deleted file mode 100755 index 96fbd82a99c..00000000000 --- a/tests/ci/worker/deploy-runner-init.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env bash - -set -e - -usage() { - echo "Usage: $0 ENVIRONMENT" >&2 - echo "Valid values for ENVIRONMENT: staging, production" >&2 - exit 1 -} - -case "$1" in - staging|production) - ENVIRONMENT="$1" ;; - --help) - usage ;; - *) - echo "Invalid argument" >&2 - usage ;; -esac - -cd "$(dirname "$0")" || exit 1 -SOURCE_SCRIPT='init_runner.sh' - -check_response() { - # Are we even in the interactive shell? - [ -t 1 ] || return 1 - local request - request="$1" - read -rp "$request (y/N): " response - case "$response" in - [Yy]) - return 0 - # Your code to continue goes here - ;; - *) - return 1 - ;; - esac -} - -check_dirty() { - if [ -n "$(git status --porcelain=v2 "$SOURCE_SCRIPT")" ]; then - echo "The $SOURCE_SCRIPT has uncommited changes, won't deploy it" >&2 - exit 1 - fi -} -GIT_HASH=$(git log -1 --format=format:%H) - -header() { - cat << EOF -#!/usr/bin/env bash - -echo 'The $ENVIRONMENT script is generated from $SOURCE_SCRIPT, commit $GIT_HASH' - -EOF -} - -body() { - local first_line - first_line=$(sed -n '/^# THE SCRIPT START$/{=;q;}' "$SOURCE_SCRIPT") - if [ -z "$first_line" ]; then - echo "The pattern '# THE SCRIPT START' is not found in $SOURCE_SCRIPT" >&2 - exit 1 - fi - tail "+$first_line" "$SOURCE_SCRIPT" -} - -GENERATED_FILE="generated_${ENVIRONMENT}_${SOURCE_SCRIPT}" - -{ header && body; } > "$GENERATED_FILE" - -echo "The file $GENERATED_FILE is generated" - -if check_response "Display the content of $GENERATED_FILE?"; then - if [ -z "$PAGER" ]; then - less "$GENERATED_FILE" - else - $PAGER "$GENERATED_FILE" - fi -fi - -check_dirty - -S3_OBJECT=${S3_OBJECT:-s3://github-runners-data/cloud-init/${ENVIRONMENT}.sh} -if check_response "Deploy the generated script to $S3_OBJECT?"; then - aws s3 mv "$GENERATED_FILE" "$S3_OBJECT" -fi diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh deleted file mode 100644 index d6cdb6d9c57..00000000000 --- a/tests/ci/worker/init_runner.sh +++ /dev/null @@ -1,402 +0,0 @@ -#!/usr/bin/env bash - -cat > /dev/null << 'EOF' -The following content is embedded into the s3 object via the script -deploy-runner-init.sh {staging,production} -with additional helping information - -In the `user data` you should define as the following text -between `### COPY BELOW` and `### COPY ABOVE` - -### COPY BELOW -Content-Type: multipart/mixed; boundary="//" -MIME-Version: 1.0 - ---// -Content-Type: text/cloud-config; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Content-Disposition: attachment; filename="cloud-config.txt" - -#cloud-config -cloud_final_modules: -- [scripts-user, always] - ---// -Content-Type: text/x-shellscript; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Content-Disposition: attachment; filename="userdata.txt" - -#!/bin/bash -INSTANCE_ID=$(ec2metadata --instance-id) -INIT_ENVIRONMENT=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:init-environment'].Value" --output text) -echo "Downloading and using $INIT_ENVIRONMENT cloud-init.sh" -aws s3 cp "s3://github-runners-data/cloud-init/${INIT_ENVIRONMENT:-production}.sh" /tmp/cloud-init.sh -chmod 0700 /tmp/cloud-init.sh -exec bash /tmp/cloud-init.sh ---// -### COPY ABOVE -EOF - -# THE SCRIPT START - -set -uo pipefail - -#################################### -# IMPORTANT! # -# EC2 instance should have # -# `github:runner-type` tag # -# set accordingly to a runner role # -#################################### - -echo "Running init v1" -export DEBIAN_FRONTEND=noninteractive -export RUNNER_HOME=/home/ubuntu/actions-runner - -export RUNNER_ORG="ClickHouse" -export RUNNER_URL="https://github.com/${RUNNER_ORG}" -# Funny fact, but metadata service has fixed IP -INSTANCE_ID=$(ec2metadata --instance-id) -export INSTANCE_ID - -bash /usr/local/share/scripts/init-network.sh - -# combine labels -RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text) -LABELS="self-hosted,Linux,$(uname -m),$RUNNER_TYPE" -export LABELS - -# Refresh CloudWatch agent config -aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -systemctl restart amazon-cloudwatch-agent.service - -# Refresh teams ssh keys -TEAM_KEYS_URL=$(aws ssm get-parameter --region us-east-1 --name team-keys-url --query 'Parameter.Value' --output=text) -curl -s "${TEAM_KEYS_URL}" > /home/ubuntu/.ssh/authorized_keys2 -chown ubuntu: /home/ubuntu/.ssh -R - - -# Create a pre-run script that will provide diagnostics info -mkdir -p /tmp/actions-hooks -cat > /tmp/actions-hooks/common.sh << 'EOF' -#!/bin/bash -EOF - -terminate_delayed() { - # The function for post hook to gracefully finish the job and then tear down - # The very specific sleep time is used later to determine in the main loop if - # the instance is tearing down - # IF `sleep` IS CHANGED, CHANGE ANOTHER VALUE IN `pgrep` - sleep=13.14159265358979323846 - echo "Going to terminate the runner's instance in $sleep seconds" - # We execute it with `at` to not have it as an orphan process, but launched independently - # GH Runners kill all remain processes - echo "sleep '$sleep'; aws ec2 terminate-instances --instance-ids $INSTANCE_ID" | at now || \ - aws ec2 terminate-instances --instance-ids "$INSTANCE_ID" # workaround for complete out of space or non-installed `at` - exit 0 -} - -detect_delayed_termination() { - # The function look for very specific sleep with pi - if pgrep 'sleep 13.14159265358979323846'; then - echo 'The instance has delayed termination, sleep the same time to wait if it goes down' - sleep 14 - fi -} - -declare -f terminate_delayed >> /tmp/actions-hooks/common.sh - -terminate_and_exit() { - # Terminate instance and exit from the script instantly - echo "Going to terminate the runner's instance" - aws ec2 terminate-instances --instance-ids "$INSTANCE_ID" - exit 0 -} - -terminate_decrease_and_exit() { - # Terminate instance and exit from the script instantly - echo "Going to terminate the runner's instance and decrease asg capacity" - aws autoscaling terminate-instance-in-auto-scaling-group --instance-id "$INSTANCE_ID" --should-decrement-desired-capacity - exit 0 -} - -declare -f terminate_and_exit >> /tmp/actions-hooks/common.sh - -check_spot_instance_is_old() { - # This function should be executed ONLY BETWEEN runnings. - # It's unsafe to execute while the runner is working! - local LIFE_CYCLE - LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle) - if [ "$LIFE_CYCLE" == "spot" ]; then - local UPTIME - UPTIME=$(< /proc/uptime) - UPTIME=${UPTIME%%.*} - if (( 3600 < UPTIME )); then - echo "The spot instance has uptime $UPTIME, it's time to shut it down" - return 0 - fi - fi - return 1 -} - -check_proceed_spot_termination() { - # The function checks and proceeds spot instance termination if exists - # The event for spot instance termination - local FORCE - FORCE=${1:-} - if TERMINATION_DATA=$(curl -s --fail http://169.254.169.254/latest/meta-data/spot/instance-action); then - # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-instance-termination-notices.html#instance-action-metadata - _action=$(jq '.action' -r <<< "$TERMINATION_DATA") - _time=$(jq '.time | fromdate' <<< "$TERMINATION_DATA") - _until_action=$((_time - $(date +%s))) - echo "Received the '$_action' event that will be effective in $_until_action seconds" - if (( _until_action <= 30 )) || [ "$FORCE" == "force" ]; then - echo "The action $_action will be done in $_until_action, killing the runner and exit" - local runner_pid - runner_pid=$(pgrep Runner.Listener) - if [ -n "$runner_pid" ]; then - # Kill the runner to not allow it cancelling the job - # shellcheck disable=SC2046 - kill -9 "$runner_pid" $(list_children "$runner_pid") - fi - sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" - terminate_and_exit - fi - fi -} - -no_terminating_metadata() { - # The function check that instance could continue work - # Returns 1 if any of termination events are received - - # The event for rebalance recommendation. Not strict, so we have some room to make a decision here - if curl -s --fail http://169.254.169.254/latest/meta-data/events/recommendations/rebalance; then - echo 'Received recommendation to rebalance, checking the uptime' - local UPTIME - UPTIME=$(< /proc/uptime) - UPTIME=${UPTIME%%.*} - # We don't shutdown the instances younger than 30m - if (( 1800 < UPTIME )); then - # To not shutdown everything at once, use the 66% to survive - if (( $((RANDOM % 3)) == 0 )); then - echo 'The instance is older than 30m and won the roulette' - return 1 - fi - echo 'The instance is older than 30m, but is not chosen for rebalance' - else - echo 'The instance is younger than 30m, do not shut it down' - fi - fi - - # Checks if the ASG in a lifecycle hook state - local ASG_STATUS - ASG_STATUS=$(curl -s http://169.254.169.254/latest/meta-data/autoscaling/target-lifecycle-state) - if [ "$ASG_STATUS" == "Terminated" ]; then - echo 'The instance in ASG status Terminating:Wait' - return 1 - fi -} - -terminate_on_event() { - # If there is a rebalance event, then the instance could die soon - # Let's don't wait for it and terminate proactively - if curl -s --fail http://169.254.169.254/latest/meta-data/events/recommendations/rebalance; then - terminate_and_exit - fi - - # Here we check if the autoscaling group marked the instance for termination, and it's wait for the job to finish - ASG_STATUS=$(curl -s http://169.254.169.254/latest/meta-data/autoscaling/target-lifecycle-state) - if [ "$ASG_STATUS" == "Terminated" ]; then - INSTANCE_ID=$(ec2metadata --instance-id) - ASG_NAME=$(aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='aws:autoscaling:groupName'].Value" --output text) - LIFECYCLE_HOOKS=$(aws autoscaling describe-lifecycle-hooks --auto-scaling-group-name "$ASG_NAME" --query "LifecycleHooks[].LifecycleHookName" --output text) - for LCH in $LIFECYCLE_HOOKS; do - aws autoscaling complete-lifecycle-action --lifecycle-action-result CONTINUE \ - --lifecycle-hook-name "$LCH" --auto-scaling-group-name "$ASG_NAME" \ - --instance-id "$INSTANCE_ID" - true # autoformat issue - done - echo 'The runner is marked as "Terminated" by the autoscaling group, we are terminating' - terminate_and_exit - fi -} - -cat > /tmp/actions-hooks/pre-run.sh << EOF -#!/bin/bash -set -uo pipefail - -echo "Runner's public DNS: $(ec2metadata --public-hostname)" -echo "Runner's labels: ${LABELS}" -echo "Runner's instance type: $(ec2metadata --instance-type)" -EOF - -# Create a post-run script that will restart docker daemon before the job started -cat > /tmp/actions-hooks/post-run.sh << 'EOF' -#!/bin/bash -set -xuo pipefail - -source /tmp/actions-hooks/common.sh - -# Free KiB, free percents -ROOT_STAT=($(df / | awk '/\// {print $4 " " int($4/$2 * 100)}')) -if [[ ${ROOT_STAT[0]} -lt 3000000 ]] || [[ ${ROOT_STAT[1]} -lt 5 ]]; then - echo "The runner has ${ROOT_STAT[0]}KiB and ${ROOT_STAT[1]}% of free space on /" - terminate_delayed -fi - -# shellcheck disable=SC2046 -docker ps --quiet | xargs --no-run-if-empty docker kill ||: -# shellcheck disable=SC2046 -docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - -# If we have hanged containers after the previous commands, than we have a hanged one -# and should restart the daemon -if [ "$(docker ps --all --quiet)" ]; then - # Systemd service of docker has StartLimitBurst=3 and StartLimitInterval=60s, - # that's why we try restarting it for long - for i in {1..25}; - do - sudo systemctl restart docker && break || sleep 5 - done - - for i in {1..10} - do - docker info && break || sleep 2 - done - # Last chance, otherwise we have to terminate poor instance - docker info 1>/dev/null || { echo Docker unable to start; terminate_delayed ; } -fi -EOF - -get_runner_token() { - /usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value -} - -is_job_assigned() { - local runner_pid - runner_pid=$(pgrep Runner.Listener) - if [ -z "$runner_pid" ]; then - # if runner has finished, it's fine - return 0 - fi - local log_file - log_file=$(lsof -p "$runner_pid" 2>/dev/null | grep -o "$RUNNER_HOME/_diag/Runner.*log") - if [ -z "$log_file" ]; then - # assume, the process is over or just started - return 0 - fi - # So far it's the only solid way to determine that the job is starting - grep -q 'Terminal] .* Running job:' "$log_file" \ - && return 0 \ - || return 1 -} - -list_children () { - local children - children=$(ps --ppid "$1" -o pid=) - if [ -z "$children" ]; then - return - fi - - for pid in $children; do - list_children "$pid" - done - echo "$children" -} - -# There's possibility that it fails because the runner's version is outdated, -# so after the first failure we'll try to launch it with enabled autoupdate. -# -# We'll fail and terminate after 10 consequent failures. -ATTEMPT=0 -# In `kill` 0 means "all processes in process group", -1 is "all but PID 1" -# We use `-2` to get an error -RUNNER_PID=-2 - -while true; do - # Does not send signal, but checks that the process $RUNNER_PID is running - if kill -0 -- $RUNNER_PID; then - ATTEMPT=0 - echo "Runner is working with pid $RUNNER_PID, checking the metadata in background" - check_proceed_spot_termination - - if ! is_job_assigned; then - RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$RUNNER_PID" 2>/dev/null || date +%s) )) - echo "The runner is launched $RUNNER_AGE seconds ago and still hasn't received a job" - if (( 60 < RUNNER_AGE )); then - echo "Attempt to delete the runner for a graceful shutdown" - sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \ - || continue - echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down" - terminate_decrease_and_exit - fi - fi - else - if [ "$RUNNER_PID" != "-2" ]; then - wait $RUNNER_PID \ - && echo "Runner with PID $RUNNER_PID successfully finished" \ - || echo "Attempt $((++ATTEMPT)) to start the runner" - fi - if (( ATTEMPT > 10 )); then - echo "The runner has failed to start after $ATTEMPT attempt. Give up and terminate it" - terminate_and_exit - fi - - cd $RUNNER_HOME || terminate_and_exit - detect_delayed_termination - # If runner is not active, check that it needs to terminate itself - echo "Checking if the instance suppose to terminate" - no_terminating_metadata || terminate_on_event - check_spot_instance_is_old && terminate_and_exit - check_proceed_spot_termination force - - echo "Going to configure runner" - token_args=(--token "$(get_runner_token)") - config_args=( - "${token_args[@]}" --url "$RUNNER_URL" - --ephemeral --unattended --replace --runnergroup Default - --labels "$LABELS" --work _work --name "$INSTANCE_ID" - ) - if (( ATTEMPT > 1 )); then - echo 'The runner failed to start at least once. Removing it and then configuring with autoupdate enabled.' - sudo -u ubuntu ./config.sh remove "${token_args[@]}" - sudo -u ubuntu ./config.sh "${config_args[@]}" - else - echo "Configure runner with disabled autoupdate" - config_args+=("--disableupdate") - sudo -u ubuntu ./config.sh "${config_args[@]}" - fi - - echo "Another one check to avoid race between runner and infrastructure" - no_terminating_metadata || terminate_on_event - check_spot_instance_is_old && terminate_and_exit - check_proceed_spot_termination force - - # There were some failures to start the Job because of trash in _work - rm -rf _work - - # https://github.com/actions/runner/issues/3266 - # We're unable to know if the runner is failed to start. - echo 'Monkey-patching run helpers to get genuine exit code of the runner' - for script in run.sh run-helper.sh.template; do - # shellcheck disable=SC2016 - grep -q 'exit 0$' "$script" && \ - sed 's/exit 0/exit $returnCode/' -i "$script" && \ - echo "Script $script is patched" - done - - echo "Run" - sudo -u ubuntu \ - ACTIONS_RUNNER_HOOK_JOB_STARTED=/tmp/actions-hooks/pre-run.sh \ - ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/tmp/actions-hooks/post-run.sh \ - ./run.sh & - RUNNER_PID=$! - - sleep 10 - fi - - sleep 5 -done - -# vim:ts=4:sw=4 diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 0c04d8fb2c3..a29c786e998 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -835,6 +835,7 @@ class SettingsRandomizer: "cross_join_min_bytes_to_compress": lambda: random.choice([0, 1, 100000000]), "min_external_table_block_size_bytes": lambda: random.choice([0, 1, 100000000]), "max_parsing_threads": lambda: random.choice([0, 1, 10]), + "optimize_functions_to_subcolumns": lambda: random.randint(0, 1), } @staticmethod diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 548b58a17e8..7d80fbe90f8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -52,6 +52,7 @@ from helpers.client import QueryRuntimeException import docker from .client import Client +from .retry_decorator import retry from .config_cluster import * @@ -2690,15 +2691,12 @@ class ClickHouseCluster: images_pull_cmd = self.base_cmd + ["pull"] # sometimes dockerhub/proxy can be flaky - for i in range(5): - try: - run_and_check(images_pull_cmd) - break - except Exception as ex: - if i == 4: - raise ex - logging.info("Got exception pulling images: %s", ex) - time.sleep(i * 3) + + retry( + log_function=lambda exception: logging.info( + "Got exception pulling images: %s", exception + ), + )(run_and_check)(images_pull_cmd) if self.with_zookeeper_secure and self.base_zookeeper_cmd: logging.debug("Setup ZooKeeper Secure") @@ -2971,7 +2969,11 @@ class ClickHouseCluster: "Trying to create Azurite instance by command %s", " ".join(map(str, azurite_start_cmd)), ) - run_and_check(azurite_start_cmd) + retry( + log_function=lambda exception: logging.info( + f"Azurite initialization failed with error: {exception}" + ), + )(run_and_check)(azurite_start_cmd) self.up_called = True logging.info("Trying to connect to Azurite") self.wait_azurite_to_start() diff --git a/tests/integration/helpers/retry_decorator.py b/tests/integration/helpers/retry_decorator.py new file mode 100644 index 00000000000..aaa040464c2 --- /dev/null +++ b/tests/integration/helpers/retry_decorator.py @@ -0,0 +1,36 @@ +import time +import random +from typing import Type, List + + +def retry( + retries: int = 5, + delay: float = 1, + backoff: float = 1.5, + jitter: float = 2, + log_function=lambda *args, **kwargs: None, + retriable_expections_list: List[Type[BaseException]] = [Exception], +): + def inner(func): + def wrapper(*args, **kwargs): + current_delay = delay + for retry in range(retries): + try: + func(*args, **kwargs) + break + except Exception as e: + should_retry = False + for retriable_exception in retriable_expections_list: + if isinstance(e, retriable_exception): + should_retry = True + break + if not should_retry or (retry == retries - 1): + raise e + log_function(retry=retry, exception=e) + sleep_time = current_delay + random.uniform(0, jitter) + time.sleep(sleep_time) + current_delay *= backoff + + return wrapper + + return inner diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 3c3d1b6cc96..99fa626bd1e 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -43,7 +43,6 @@ "test_replicated_database/test.py::test_startup_without_zk", "test_replicated_database/test.py::test_sync_replica", "test_replicated_fetches_timeouts/test.py::test_no_stall", - "test_storage_kafka/test.py::test_kafka_no_holes_when_write_suffix_failed", "test_storage_s3/test.py::test_url_reconnect_in_the_middle", "test_system_metrics/test.py::test_readonly_metrics", "test_system_replicated_fetches/test.py::test_system_replicated_fetches", @@ -73,7 +72,7 @@ "test_keeper_map/test.py::test_keeper_map_without_zk", "test_replicated_merge_tree_wait_on_shutdown/test.py::test_shutdown_and_wait", - + "test_http_failover/test.py::test_url_destination_host_with_multiple_addrs", "test_http_failover/test.py::test_url_invalid_hostname", "test_http_failover/test.py::test_url_ip_change", @@ -97,5 +96,75 @@ "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_stop_moves_query", "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_table_detach", - "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_zookeeper_disconnect" + "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_zookeeper_disconnect", + + "test_storage_kafka/test.py::test_kafka_column_types", + "test_storage_kafka/test.py::test_kafka_settings_old_syntax", + "test_storage_kafka/test.py::test_kafka_settings_new_syntax", + "test_storage_kafka/test.py::test_kafka_settings_predefined_macros", + "test_storage_kafka/test.py::test_kafka_json_as_string", + "test_storage_kafka/test.py::test_kafka_formats", + "test_storage_kafka/test.py::test_kafka_issue11308", + "test_storage_kafka/test.py::test_kafka_issue4116", + "test_storage_kafka/test.py::test_kafka_consumer_hang", + "test_storage_kafka/test.py::test_kafka_consumer_hang2", + "test_storage_kafka/test.py::test_kafka_read_consumers_in_parallel", + "test_storage_kafka/test.py::test_kafka_csv_with_delimiter", + "test_storage_kafka/test.py::test_kafka_tsv_with_delimiter", + "test_storage_kafka/test.py::test_kafka_select_empty", + "test_storage_kafka/test.py::test_kafka_json_without_delimiter", + "test_storage_kafka/test.py::test_kafka_protobuf", + "test_storage_kafka/test.py::test_kafka_string_field_on_first_position_in_protobuf", + "test_storage_kafka/test.py::test_kafka_protobuf_no_delimiter", + "test_storage_kafka/test.py::test_kafka_materialized_view", + "test_storage_kafka/test.py::test_kafka_recreate_kafka_table", + "test_storage_kafka/test.py::test_librdkafka_compression", + "test_storage_kafka/test.py::test_kafka_materialized_view_with_subquery", + "test_storage_kafka/test.py::test_kafka_many_materialized_views", + "test_storage_kafka/test.py::test_kafka_flush_on_big_message", + "test_storage_kafka/test.py::test_kafka_virtual_columns", + "test_storage_kafka/test.py::test_kafka_virtual_columns_with_materialized_view", + "test_storage_kafka/test.py::test_kafka_insert", + "test_storage_kafka/test.py::test_kafka_produce_consume", + "test_storage_kafka/test.py::test_kafka_commit_on_block_write", + "test_storage_kafka/test.py::test_kafka_virtual_columns2", + "test_storage_kafka/test.py::test_kafka_producer_consumer_separate_settings", + "test_storage_kafka/test.py::test_kafka_produce_key_timestamp", + "test_storage_kafka/test.py::test_kafka_insert_avro", + "test_storage_kafka/test.py::test_kafka_produce_consume_avro", + "test_storage_kafka/test.py::test_kafka_flush_by_time", + "test_storage_kafka/test.py::test_kafka_flush_by_block_size", + "test_storage_kafka/test.py::test_kafka_lot_of_partitions_partial_commit_of_bulk", + "test_storage_kafka/test.py::test_kafka_rebalance", + "test_storage_kafka/test.py::test_kafka_no_holes_when_write_suffix_failed", + "test_storage_kafka/test.py::test_exception_from_destructor", + "test_storage_kafka/test.py::test_commits_of_unprocessed_messages_on_drop", + "test_storage_kafka/test.py::test_bad_reschedule", + "test_storage_kafka/test.py::test_kafka_duplicates_when_commit_failed", + "test_storage_kafka/test.py::test_premature_flush_on_eof", + "test_storage_kafka/test.py::test_kafka_unavailable", + "test_storage_kafka/test.py::test_kafka_issue14202", + "test_storage_kafka/test.py::test_kafka_csv_with_thread_per_consumer", + "test_storage_kafka/test.py::test_kafka_engine_put_errors_to_stream", + "test_storage_kafka/test.py::test_kafka_engine_put_errors_to_stream_with_random_malformed_json", + "test_storage_kafka/test.py::test_kafka_formats_with_broken_message", + "test_storage_kafka/test.py::test_kafka_consumer_failover", + "test_storage_kafka/test.py::test_kafka_predefined_configuration", + "test_storage_kafka/test.py::test_issue26643", + "test_storage_kafka/test.py::test_num_consumers_limit", + "test_storage_kafka/test.py::test_format_with_prefix_and_suffix", + "test_storage_kafka/test.py::test_max_rows_per_message", + "test_storage_kafka/test.py::test_row_based_formats", + "test_storage_kafka/test.py::test_block_based_formats_1", + "test_storage_kafka/test.py::test_block_based_formats_2", + "test_storage_kafka/test.py::test_system_kafka_consumers", + "test_storage_kafka/test.py::test_system_kafka_consumers_rebalance", + "test_storage_kafka/test.py::test_system_kafka_consumers_rebalance_mv", + "test_storage_kafka/test.py::test_formats_errors", + "test_storage_kafka/test.py::test_multiple_read_in_materialized_views", + + "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string", + "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_request_new_ticket_after_expiration", + "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_no_kdc", + "test_storage_kerberized_kafka/test.py::test_kafka_config_from_sql_named_collection" ] diff --git a/tests/integration/runner b/tests/integration/runner index a583d7fe897..0667541b196 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -243,6 +243,10 @@ if __name__ == "__main__": "-n", "--parallel", action="store", dest="parallel", help="Parallelism" ) + parser.add_argument( + "--count", action="store", type=int, dest="count", help="Repeat count" + ) + parser.add_argument( "--no-random", action="store", @@ -318,6 +322,10 @@ if __name__ == "__main__": parallel_args += "--dist=loadfile" parallel_args += f" -n {args.parallel}".format() + repeat_args = ( + f" --count {args.count}" if args.count is not None and args.count > 0 else "" + ) + rand_args = "" # if not args.no_random: # rand_args += f"--random-seed={os.getpid()}" @@ -409,7 +417,7 @@ if __name__ == "__main__": f"--volume={args.utils_dir}/grpc-client/pb2:/ClickHouse/utils/grpc-client/pb2 " f"--volume=/run:/run/host:ro {dockerd_internal_volume} {env_tags} {env_cleanup} " f"-e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 {use_old_analyzer} -e PYTHONUNBUFFERED=1 " - f'-e PYTEST_ADDOPTS="{parallel_args} {pytest_opts} {tests_list} {rand_args} -vvv"' + f'-e PYTEST_ADDOPTS="{parallel_args} {repeat_args} {pytest_opts} {tests_list} {rand_args} -vvv"' f" {DIND_INTEGRATION_TESTS_IMAGE_NAME}:{args.docker_image_version}" ) diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index d8662fad011..4806625f3f0 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1,9 +1,10 @@ -import pytest import glob -import re -import random import os.path +import pytest +import random +import re import sys +import uuid from collections import namedtuple from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry, TSV @@ -538,7 +539,8 @@ def test_backup_not_found_or_already_exists(): def test_file_engine(): - backup_name = f"File('/backups/file/')" + id = uuid.uuid4() + backup_name = f"File('/backups/file/{id}/')" create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -549,6 +551,7 @@ def test_file_engine(): instance.query(f"RESTORE TABLE test.table FROM {backup_name}") assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + instance.query("DROP TABLE test.table") def test_database(): @@ -565,7 +568,8 @@ def test_database(): def test_zip_archive(): - backup_name = f"Disk('backups', 'archive.zip')" + id = uuid.uuid4() + backup_name = f"Disk('backups', 'archive_{id}.zip')" create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -578,10 +582,12 @@ def test_zip_archive(): instance.query(f"RESTORE TABLE test.table FROM {backup_name}") assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + instance.query("DROP TABLE test.table") def test_zip_archive_with_settings(): - backup_name = f"Disk('backups', 'archive_with_settings.zip')" + id = uuid.uuid4() + backup_name = f"Disk('backups', 'archive_with_settings_{id}.zip')" create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -596,10 +602,12 @@ def test_zip_archive_with_settings(): f"RESTORE TABLE test.table FROM {backup_name} SETTINGS password='qwerty'" ) assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + instance.query("DROP TABLE test.table") def test_zip_archive_with_bad_compression_method(): - backup_name = f"Disk('backups', 'archive_with_bad_compression_method.zip')" + id = uuid.uuid4() + backup_name = f"Disk('backups', 'archive_with_bad_compression_method_{id}.zip')" create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -617,7 +625,8 @@ def test_zip_archive_with_bad_compression_method(): def test_tar_archive(): - backup_name = f"Disk('backups', 'archive.tar')" + id = uuid.uuid4() + backup_name = f"Disk('backups', 'archive_{id}.tar')" create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -630,10 +639,12 @@ def test_tar_archive(): instance.query(f"RESTORE TABLE test.table FROM {backup_name}") assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + instance.query("DROP TABLE test.table") def test_tar_bz2_archive(): - backup_name = f"Disk('backups', 'archive.tar.bz2')" + id = uuid.uuid4() + backup_name = f"Disk('backups', 'archive_{id}.tar.bz2')" create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -649,7 +660,8 @@ def test_tar_bz2_archive(): def test_tar_gz_archive(): - backup_name = f"Disk('backups', 'archive.tar.gz')" + id = uuid.uuid4() + backup_name = f"Disk('backups', 'archive_{id}.tar.gz')" create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -665,7 +677,8 @@ def test_tar_gz_archive(): def test_tar_lzma_archive(): - backup_name = f"Disk('backups', 'archive.tar.lzma')" + id = uuid.uuid4() + backup_name = f"Disk('backups', 'archive_{id}.tar.lzma')" create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -681,7 +694,8 @@ def test_tar_lzma_archive(): def test_tar_zst_archive(): - backup_name = f"Disk('backups', 'archive.tar.zst')" + id = uuid.uuid4() + backup_name = f"Disk('backups', 'archive_{id}.tar.zst')" create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -697,7 +711,8 @@ def test_tar_zst_archive(): def test_tar_xz_archive(): - backup_name = f"Disk('backups', 'archive.tar.xz')" + id = uuid.uuid4() + backup_name = f"Disk('backups', 'archive_{id}.tar.xz')" create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -713,7 +728,8 @@ def test_tar_xz_archive(): def test_tar_archive_with_password(): - backup_name = f"Disk('backups', 'archive_with_password.tar')" + id = uuid.uuid4() + backup_name = f"Disk('backups', 'archive_with_password_{id}.tar')" create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -731,7 +747,8 @@ def test_tar_archive_with_password(): def test_tar_archive_with_bad_compression_method(): - backup_name = f"Disk('backups', 'archive_with_bad_compression_method.tar')" + id = uuid.uuid4() + backup_name = f"Disk('backups', 'archive_with_bad_compression_method_{id}.tar')" create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -1220,6 +1237,10 @@ def test_system_users_required_privileges(): assert instance.query("SHOW CREATE ROLE r1") == "CREATE ROLE r1\n" assert instance.query("SHOW GRANTS FOR r1") == "" + instance.query("DROP USER u1") + instance.query("DROP ROLE r1") + instance.query("DROP USER u2") + def test_system_users_async(): instance.query("CREATE USER u1 IDENTIFIED BY 'qwe123' SETTINGS custom_c = 3") @@ -1412,6 +1433,8 @@ def test_system_functions(): assert instance.query("SELECT number, parity_str(number) FROM numbers(3)") == TSV( [[0, "even"], [1, "odd"], [2, "even"]] ) + instance.query("DROP FUNCTION linear_equation") + instance.query("DROP FUNCTION parity_str") def test_backup_partition(): diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 700ed6f15f5..1b7f4aaa97d 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -776,7 +776,6 @@ def test_system_users(): def test_system_functions(): node1.query("CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;") - node1.query("CREATE FUNCTION parity_str AS (n) -> if(n % 2, 'odd', 'even');") backup_name = new_backup_name() @@ -817,6 +816,9 @@ def test_system_functions(): [[0, "even"], [1, "odd"], [2, "even"]] ) + node1.query("DROP FUNCTION linear_equation") + node1.query("DROP FUNCTION parity_str") + def test_projection(): node1.query( diff --git a/tests/integration/test_backup_restore_on_cluster/test_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_concurrency.py index c08f3c9c242..0e381d48fb1 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_concurrency.py @@ -276,15 +276,10 @@ def test_create_or_drop_tables_during_backup(db_engine, table_engine): for node in nodes: assert_eq_with_retry( node, - f"SELECT status from system.backups WHERE id IN {ids_list} AND (status == 'CREATING_BACKUP')", - "", - ) - - for node in nodes: - assert_eq_with_retry( - node, - f"SELECT status, error from system.backups WHERE id IN {ids_list} AND (status == 'BACKUP_FAILED')", + f"SELECT status, error from system.backups " + f"WHERE id IN {ids_list} AND ((status == 'CREATING_BACKUP') OR (status == 'BACKUP_FAILED'))", "", + retry_count=100, ) backup_names = {} diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index cd0f2032559..f3b6993c4fd 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -9,13 +9,13 @@ import re cluster = ClickHouseCluster(__file__) -num_nodes = 10 +num_nodes = 2 def generate_cluster_def(): path = os.path.join( os.path.dirname(os.path.realpath(__file__)), - "./_gen/cluster_for_concurrency_test.xml", + "./_gen/cluster_for_test_disallow_concurrency.xml", ) os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "w") as f: @@ -111,22 +111,56 @@ def create_and_fill_table(): nodes[i].query(f"INSERT INTO tbl SELECT number FROM numbers(40000000)") -def wait_for_fail_backup(node, backup_id, backup_name): +def get_status_and_error(node, backup_or_restore_id): + return ( + node.query( + f"SELECT status, error FROM system.backups WHERE id == '{backup_or_restore_id}'" + ) + .rstrip("\n") + .split("\t") + ) + + +def wait_for_backup(node, backup_id): + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id = '{backup_id}'", + "BACKUP_CREATED", + sleep_time=2, + retry_count=50, + ) + + +def wait_for_restore(node, restore_id): + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id == '{restore_id}'", + "RESTORED", + sleep_time=2, + retry_count=50, + ) + + +def check_backup_error(error): expected_errors = [ "Concurrent backups not supported", - f"Backup {backup_name} already exists", + "BACKUP_ALREADY_EXISTS", ] - status = node.query( - f"SELECT status FROM system.backups WHERE id == '{backup_id}'" - ).rstrip("\n") + assert any([expected_error in error for expected_error in expected_errors]) + + +def check_restore_error(error): + expected_errors = [ + "Concurrent restores not supported", + "Cannot restore the table default.tbl because it already contains some data", + ] + assert any([expected_error in error for expected_error in expected_errors]) + + +def wait_for_backup_failure(node, backup_id): + status, error = get_status_and_error(node, backup_id) # It is possible that the second backup was picked up first, and then the async backup - if status == "BACKUP_FAILED": - error = node.query( - f"SELECT error FROM system.backups WHERE id == '{backup_id}'" - ).rstrip("\n") - assert any([expected_error in error for expected_error in expected_errors]) - return - elif status == "CREATING_BACKUP": + if status == "CREATING_BACKUP": assert_eq_with_retry( node, f"SELECT status FROM system.backups WHERE id = '{backup_id}'", @@ -134,31 +168,17 @@ def wait_for_fail_backup(node, backup_id, backup_name): sleep_time=2, retry_count=50, ) - error = node.query( - f"SELECT error FROM system.backups WHERE id == '{backup_id}'" - ).rstrip("\n") - assert re.search(f"Backup {backup_name} already exists", error) - return + status, error = get_status_and_error(node, backup_id) + if status == "BACKUP_FAILED": + check_backup_error(error) else: assert False, "Concurrent backups both passed, when one is expected to fail" -def wait_for_fail_restore(node, restore_id): - expected_errors = [ - "Concurrent restores not supported", - "Cannot restore the table default.tbl because it already contains some data", - ] - status = node.query( - f"SELECT status FROM system.backups WHERE id == '{restore_id}'" - ).rstrip("\n") +def wait_for_restore_failure(node, restore_id): + status, error = get_status_and_error(node, restore_id) # It is possible that the second backup was picked up first, and then the async backup - if status == "RESTORE_FAILED": - error = node.query( - f"SELECT error FROM system.backups WHERE id == '{restore_id}'" - ).rstrip("\n") - assert any([expected_error in error for expected_error in expected_errors]) - return - elif status == "RESTORING": + if status == "RESTORING": assert_eq_with_retry( node, f"SELECT status FROM system.backups WHERE id = '{restore_id}'", @@ -166,14 +186,9 @@ def wait_for_fail_restore(node, restore_id): sleep_time=2, retry_count=50, ) - error = node.query( - f"SELECT error FROM system.backups WHERE id == '{restore_id}'" - ).rstrip("\n") - assert re.search( - "Cannot restore the table default.tbl because it already contains some data", - error, - ) - return + status, error = get_status_and_error(node, restore_id) + if status == "RESTORE_FAILED": + check_restore_error(error) else: assert False, "Concurrent restores both passed, when one is expected to fail" @@ -188,39 +203,28 @@ def test_concurrent_backups_on_same_node(): backup_name = new_backup_name() - id = ( + # Backup #1. + id, status = ( nodes[0] .query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name} ASYNC") - .split("\t")[0] + .rstrip("\n") + .split("\t") ) - status = ( - nodes[0] - .query(f"SELECT status FROM system.backups WHERE id == '{id}'") - .rstrip("\n") - ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - result, error = nodes[0].query_and_get_answer_with_error( + # Backup #2. + _, error = nodes[0].query_and_get_answer_with_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) - expected_errors = [ - "Concurrent backups not supported", - f"Backup {backup_name} already exists", - ] - if not error: - wait_for_fail_backup(nodes[0], id, backup_name) - - assert any([expected_error in error for expected_error in expected_errors]) - - assert_eq_with_retry( - nodes[0], - f"SELECT status FROM system.backups WHERE id = '{id}'", - "BACKUP_CREATED", - sleep_time=2, - retry_count=50, - ) + if error: + # Backup #2 failed, backup #1 should be successful. + check_backup_error(error) + wait_for_backup(nodes[0], id) + else: + # Backup #2 was successful, backup #1 should fail. + wait_for_backup_failure(nodes[0], id) # This restore part is added to confirm creating an internal backup & restore work # even when a concurrent backup is stopped @@ -238,40 +242,38 @@ def test_concurrent_backups_on_different_nodes(): backup_name = new_backup_name() - id = ( - nodes[1] + # Backup #1. + id, status = ( + nodes[0] .query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name} ASYNC") - .split("\t")[0] + .rstrip("\n") + .split("\t") ) - status = ( - nodes[1] - .query(f"SELECT status FROM system.backups WHERE id == '{id}'") - .rstrip("\n") - ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - result, error = nodes[0].query_and_get_answer_with_error( + # Backup #2. + _, error = nodes[1].query_and_get_answer_with_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) - expected_errors = [ - "Concurrent backups not supported", - f"Backup {backup_name} already exists", - ] + if error: + # Backup #2 failed, backup #1 should be successful. + check_backup_error(error) + wait_for_backup(nodes[0], id) + else: + # Backup #2 was successful, backup #1 should fail. + wait_for_backup_failure(nodes[0], id) - if not error: - wait_for_fail_backup(nodes[1], id, backup_name) - - assert any([expected_error in error for expected_error in expected_errors]) - - assert_eq_with_retry( - nodes[1], - f"SELECT status FROM system.backups WHERE id = '{id}'", - "BACKUP_CREATED", - sleep_time=2, - retry_count=50, + # This restore part is added to confirm creating an internal backup & restore work + # even when a concurrent backup is stopped + nodes[0].query( + f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC", + settings={ + "distributed_ddl_task_timeout": 360, + }, ) + nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}") def test_concurrent_restores_on_same_node(): @@ -288,40 +290,28 @@ def test_concurrent_restores_on_same_node(): }, ) - restore_id = ( + # Restore #1. + restore_id, status = ( nodes[0] .query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC") - .split("\t")[0] + .rstrip("\n") + .split("\t") ) - status = ( - nodes[0] - .query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'") - .rstrip("\n") - ) assert status in ["RESTORING", "RESTORED"] - result, error = nodes[0].query_and_get_answer_with_error( + # Restore #2. + _, error = nodes[0].query_and_get_answer_with_error( f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" ) - expected_errors = [ - "Concurrent restores not supported", - "Cannot restore the table default.tbl because it already contains some data", - ] - - if not error: - wait_for_fail_restore(nodes[0], restore_id) - - assert any([expected_error in error for expected_error in expected_errors]) - - assert_eq_with_retry( - nodes[0], - f"SELECT status FROM system.backups WHERE id == '{restore_id}'", - "RESTORED", - sleep_time=2, - retry_count=50, - ) + if error: + # Restore #2 failed, restore #1 should be successful. + check_restore_error(error) + wait_for_restore(nodes[0], restore_id) + else: + # Restore #2 was successful, restore #1 should fail. + wait_for_restore_failure(nodes[0], restore_id) def test_concurrent_restores_on_different_node(): @@ -338,37 +328,25 @@ def test_concurrent_restores_on_different_node(): }, ) - restore_id = ( + # Restore #1. + restore_id, status = ( nodes[0] .query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC") - .split("\t")[0] + .rstrip("\n") + .split("\t") ) - status = ( - nodes[0] - .query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'") - .rstrip("\n") - ) assert status in ["RESTORING", "RESTORED"] - result, error = nodes[1].query_and_get_answer_with_error( + # Restore #2. + _, error = nodes[1].query_and_get_answer_with_error( f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" ) - expected_errors = [ - "Concurrent restores not supported", - "Cannot restore the table default.tbl because it already contains some data", - ] - - if not error: - wait_for_fail_restore(nodes[0], restore_id) - - assert any([expected_error in error for expected_error in expected_errors]) - - assert_eq_with_retry( - nodes[0], - f"SELECT status FROM system.backups WHERE id == '{restore_id}'", - "RESTORED", - sleep_time=2, - retry_count=50, - ) + if error: + # Restore #2 failed, restore #1 should be successful. + check_restore_error(error) + wait_for_restore(nodes[0], restore_id) + else: + # Restore #2 was successful, restore #1 should fail. + wait_for_restore_failure(nodes[0], restore_id) diff --git a/tests/integration/test_backup_restore_s3/configs/disk_s3.xml b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml index 45a1e17b039..7ac1a052c30 100644 --- a/tests/integration/test_backup_restore_s3/configs/disk_s3.xml +++ b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml @@ -21,6 +21,13 @@ minio123 33554432 + + s3_plain_rewritable + http://minio1:9001/root/data/disks/disk_s3_plain_rewritable/ + minio + minio123 + 33554432 + cache disk_s3 @@ -37,6 +44,20 @@ + + +

+ disk_s3_plain +
+ + + + +
+ disk_s3_plain_rewritable +
+
+
@@ -57,6 +78,7 @@ default disk_s3 disk_s3_plain + disk_s3_plain_rewritable disk_s3_cache disk_s3_other_bucket diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index d53335000a6..381268ce7fe 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -254,6 +254,7 @@ def check_system_tables(backup_query_id=None): ("disk_s3_cache", "ObjectStorage", "S3", "Local"), ("disk_s3_other_bucket", "ObjectStorage", "S3", "Local"), ("disk_s3_plain", "ObjectStorage", "S3", "Plain"), + ("disk_s3_plain_rewritable", "ObjectStorage", "S3", "PlainRewritable"), ("disk_s3_restricted_user", "ObjectStorage", "S3", "Local"), ) assert len(expected_disks) == len(disks) @@ -419,25 +420,15 @@ def test_backup_to_s3_multipart(): assert "ReadBufferFromS3RequestsErrors" not in restore_events -def test_backup_to_s3_native_copy(): - storage_policy = "policy_s3" - backup_name = new_backup_name() - backup_destination = ( - f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" - ) - (backup_events, restore_events) = check_backup_and_restore( - storage_policy, backup_destination - ) - # single part upload - assert backup_events["S3CopyObject"] > 0 - assert restore_events["S3CopyObject"] > 0 - assert node.contains_in_log( - f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" - ) - - -def test_backup_to_s3_native_copy_other_bucket(): - storage_policy = "policy_s3_other_bucket" +@pytest.mark.parametrize( + "storage_policy", + [ + "policy_s3", + "policy_s3_other_bucket", + "policy_s3_plain_rewritable", + ], +) +def test_backup_to_s3_native_copy(storage_policy): backup_name = new_backup_name() backup_destination = ( f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index fc03a77030e..3231fb87f33 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -75,11 +75,15 @@ def test_aggregate_states(start_cluster): except QueryRuntimeException as e: error_message = str(e) allowed_errors = [ - "NUMBER_OF_ARGUMENTS_DOESNT_MATCH", "ILLEGAL_TYPE_OF_ARGUMENT", # sequenceNextNode() and friends "UNKNOWN_AGGREGATE_FUNCTION", # Function X takes exactly one parameter: + "NUMBER_OF_ARGUMENTS_DOESNT_MATCH", + # Function X takes at least one argument + "TOO_FEW_ARGUMENTS_FOR_FUNCTION", + # Function X accepts at most 3 arguments, Y given + "TOO_MANY_ARGUMENTS_FOR_FUNCTION", # The function 'X' can only be used as a window function "BAD_ARGUMENTS", # aggThrow @@ -196,9 +200,7 @@ def test_string_functions(start_cluster): "Should start with ", # POINT/POLYGON/... "Cannot read input: expected a digit but got something else:", # ErrorCodes - "NUMBER_OF_ARGUMENTS_DOESNT_MATCH", "ILLEGAL_TYPE_OF_ARGUMENT", - "TOO_FEW_ARGUMENTS_FOR_FUNCTION", "DICTIONARIES_WAS_NOT_LOADED", "CANNOT_PARSE_UUID", "CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING", @@ -218,6 +220,11 @@ def test_string_functions(start_cluster): "CANNOT_PARSE_TEXT", "CANNOT_PARSE_DATETIME", # Function X takes exactly one parameter: + "NUMBER_OF_ARGUMENTS_DOESNT_MATCH", + # Function X takes at least one argument + "TOO_FEW_ARGUMENTS_FOR_FUNCTION", + # Function X accepts at most 3 arguments, Y given + "TOO_MANY_ARGUMENTS_FOR_FUNCTION", # The function 'X' can only be used as a window function "BAD_ARGUMENTS", # String foo is obviously not a valid IP address. diff --git a/tests/integration/test_broken_part_during_merge/test.py b/tests/integration/test_broken_part_during_merge/test.py index 19c22201fb0..0ba7beeb1fd 100644 --- a/tests/integration/test_broken_part_during_merge/test.py +++ b/tests/integration/test_broken_part_during_merge/test.py @@ -54,7 +54,7 @@ def test_merge_and_part_corruption(started_cluster): with Pool(1) as p: def optimize_with_delay(x): - node1.query("OPTIMIZE TABLE replicated_mt FINAL", timeout=30) + node1.query("OPTIMIZE TABLE replicated_mt FINAL", timeout=120) # corrupt part after merge already assigned, but not started res_opt = p.apply_async(optimize_with_delay, (1,)) @@ -70,7 +70,7 @@ def test_merge_and_part_corruption(started_cluster): node1.query( "ALTER TABLE replicated_mt UPDATE value = 7 WHERE 1", settings={"mutations_sync": 2}, - timeout=30, + timeout=120, ) assert node1.query("SELECT sum(value) FROM replicated_mt") == "2100000\n" diff --git a/tests/integration/test_broken_projections/test.py b/tests/integration/test_broken_projections/test.py index e198f98e4c5..a565f47449f 100644 --- a/tests/integration/test_broken_projections/test.py +++ b/tests/integration/test_broken_projections/test.py @@ -433,6 +433,7 @@ def test_broken_ignored_replicated(cluster): check(node, table_name, 1) create_table(node, table_name2, 2, table_name) + node.query(f"system sync replica {table_name}") check(node, table_name2, 1) break_projection(node, table_name, "proj1", "all_0_0_0", "data") diff --git a/tests/integration/test_crash_log/test.py b/tests/integration/test_crash_log/test.py index a5b82039a84..5a63e6ca6a7 100644 --- a/tests/integration/test_crash_log/test.py +++ b/tests/integration/test_crash_log/test.py @@ -60,6 +60,13 @@ def test_pkill(started_node): def test_pkill_query_log(started_node): + if ( + started_node.is_built_with_thread_sanitizer() + or started_node.is_built_with_address_sanitizer() + or started_node.is_built_with_memory_sanitizer() + ): + pytest.skip("doesn't fit in timeouts for stacktrace generation") + for signal in ["SEGV", "4"]: # force create query_log if it was not created started_node.query("SYSTEM FLUSH LOGS") diff --git a/tests/integration/test_grant_and_revoke/configs/config.xml b/tests/integration/test_grant_and_revoke/configs/config_with_table_engine_grant.xml similarity index 100% rename from tests/integration/test_grant_and_revoke/configs/config.xml rename to tests/integration/test_grant_and_revoke/configs/config_with_table_engine_grant.xml diff --git a/tests/integration/test_grant_and_revoke/configs/config_without_table_engine_grant.xml b/tests/integration/test_grant_and_revoke/configs/config_without_table_engine_grant.xml new file mode 100644 index 00000000000..d3571f281f5 --- /dev/null +++ b/tests/integration/test_grant_and_revoke/configs/config_without_table_engine_grant.xml @@ -0,0 +1,5 @@ + + + false + + diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py similarity index 99% rename from tests/integration/test_grant_and_revoke/test.py rename to tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py index e533cced1e4..25ca7913e4e 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py @@ -5,7 +5,7 @@ from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( "instance", - main_configs=["configs/config.xml"], + main_configs=["configs/config_with_table_engine_grant.xml"], user_configs=["configs/users.d/users.xml"], ) diff --git a/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py b/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py new file mode 100644 index 00000000000..4a5dfb83f79 --- /dev/null +++ b/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py @@ -0,0 +1,82 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "instance", + main_configs=["configs/config_without_table_engine_grant.xml"], + user_configs=["configs/users.d/users.xml"], +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + + instance.query("CREATE DATABASE test") + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def cleanup_after_test(): + try: + yield + finally: + instance.query("DROP USER IF EXISTS A") + instance.query("DROP TABLE IF EXISTS test.table1") + + +def test_table_engine_and_source_grant(): + instance.query("DROP USER IF EXISTS A") + instance.query("CREATE USER A") + instance.query("GRANT CREATE TABLE ON test.table1 TO A") + + instance.query("GRANT POSTGRES ON *.* TO A") + + instance.query( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + instance.query("DROP TABLE test.table1") + + instance.query("REVOKE POSTGRES ON *.* FROM A") + + assert "Not enough privileges" in instance.query_and_get_error( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + # expecting grant POSTGRES instead of grant PostgreSQL due to discrepancy between source access type and table engine + # similarily, other sources should also use their own defined name instead of the name of table engine + assert "grant POSTGRES ON *.*" in instance.query_and_get_error( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + instance.query("GRANT SOURCES ON *.* TO A") + + instance.query( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + instance.query("DROP TABLE test.table1") diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index d7b4230d872..31316af7b1e 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -104,3 +104,24 @@ def test_keeper_map_without_zk(started_cluster): node.query("DETACH TABLE test_keeper_map_without_zk") client.stop() + + +def test_keeper_map_with_failed_drop(started_cluster): + run_query( + "CREATE TABLE test_keeper_map_with_failed_drop (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_with_failed_drop') PRIMARY KEY(key);" + ) + + run_query("INSERT INTO test_keeper_map_with_failed_drop VALUES (1, 11)") + run_query("SYSTEM ENABLE FAILPOINT keepermap_fail_drop_data") + node.query("DROP TABLE test_keeper_map_with_failed_drop SYNC") + + zk_client = get_genuine_zk() + assert ( + zk_client.get("/test_keeper_map/test_keeper_map_with_failed_drop/data") + is not None + ) + + run_query("SYSTEM DISABLE FAILPOINT keepermap_fail_drop_data") + run_query( + "CREATE TABLE test_keeper_map_with_failed_drop_another (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_with_failed_drop') PRIMARY KEY(key);" + ) diff --git a/tests/integration/test_manipulate_statistics/test.py b/tests/integration/test_manipulate_statistics/test.py index a602cce63df..2541c9b946f 100644 --- a/tests/integration/test_manipulate_statistics/test.py +++ b/tests/integration/test_manipulate_statistics/test.py @@ -167,7 +167,8 @@ def test_replicated_table_ddl(started_cluster): check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_1", "c", False) check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_1", "d", True) node1.query( - "ALTER TABLE test_stat CLEAR STATISTICS d", settings={"alter_sync": "2"} + "ALTER TABLE test_stat CLEAR STATISTICS d", + settings={"alter_sync": "2", "mutations_sync": 2}, ) node1.query( "ALTER TABLE test_stat ADD STATISTICS b type tdigest", @@ -177,7 +178,8 @@ def test_replicated_table_ddl(started_cluster): check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_2", "b", False) check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_2", "d", False) node1.query( - "ALTER TABLE test_stat MATERIALIZE STATISTICS b", settings={"alter_sync": "2"} + "ALTER TABLE test_stat MATERIALIZE STATISTICS b", + settings={"alter_sync": "2", "mutations_sync": 2}, ) check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "a", True) check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "b", True) diff --git a/tests/integration/test_parallel_replicas_no_replicas/test.py b/tests/integration/test_parallel_replicas_no_replicas/test.py index 9f716459643..62d4b005d94 100644 --- a/tests/integration/test_parallel_replicas_no_replicas/test.py +++ b/tests/integration/test_parallel_replicas_no_replicas/test.py @@ -33,7 +33,10 @@ def create_tables(cluster, table_name): @pytest.mark.parametrize("skip_unavailable_shards", [1, 0]) -def test_skip_all_replicas(start_cluster, skip_unavailable_shards): +@pytest.mark.parametrize("max_parallel_replicas", [2, 3, 100]) +def test_skip_all_replicas( + start_cluster, skip_unavailable_shards, max_parallel_replicas +): cluster_name = "test_1_shard_3_unavaliable_replicas" table_name = "tt" create_tables(cluster_name, table_name) @@ -43,7 +46,7 @@ def test_skip_all_replicas(start_cluster, skip_unavailable_shards): f"SELECT key, count() FROM {table_name} GROUP BY key ORDER BY key", settings={ "allow_experimental_parallel_reading_from_replicas": 2, - "max_parallel_replicas": 3, + "max_parallel_replicas": max_parallel_replicas, "cluster_for_parallel_replicas": cluster_name, "skip_unavailable_shards": skip_unavailable_shards, }, diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 5e04c9e4d12..406b50bc486 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -654,7 +654,7 @@ def test_table_override(started_cluster): instance.query(f"SELECT count() FROM {materialized_database}.{table_name}") ) - expected = "CREATE TABLE test_database.table_override\\n(\\n `key` Int32,\\n `value` String,\\n `_sign` Int8() MATERIALIZED 1,\\n `_version` UInt64() MATERIALIZED 1\\n)\\nENGINE = ReplacingMergeTree(_version)\\nPARTITION BY key\\nORDER BY tuple(key)" + expected = "CREATE TABLE test_database.table_override\\n(\\n `key` Int32,\\n `value` String,\\n `_sign` Int8 MATERIALIZED 1,\\n `_version` UInt64 MATERIALIZED 1\\n)\\nENGINE = ReplacingMergeTree(_version)\\nPARTITION BY key\\nORDER BY tuple(key)" assert ( expected == instance.query( diff --git a/tests/integration/test_storage_kerberized_hdfs/__init__.py b/tests/integration/test_restore_external_engines/__init__.py similarity index 100% rename from tests/integration/test_storage_kerberized_hdfs/__init__.py rename to tests/integration/test_restore_external_engines/__init__.py diff --git a/tests/integration/test_restore_external_engines/configs/backups_disk.xml b/tests/integration/test_restore_external_engines/configs/backups_disk.xml new file mode 100644 index 00000000000..f7d666c6542 --- /dev/null +++ b/tests/integration/test_restore_external_engines/configs/backups_disk.xml @@ -0,0 +1,14 @@ + + + + + local + /backups/ + + + + + backups + /backups/ + + diff --git a/tests/integration/test_restore_external_engines/configs/remote_servers.xml b/tests/integration/test_restore_external_engines/configs/remote_servers.xml new file mode 100644 index 00000000000..76ad3618339 --- /dev/null +++ b/tests/integration/test_restore_external_engines/configs/remote_servers.xml @@ -0,0 +1,21 @@ + + + + + true + + replica1 + 9000 + + + replica2 + 9000 + + + replica3 + 9000 + + + + + diff --git a/tests/integration/test_restore_external_engines/test.py b/tests/integration/test_restore_external_engines/test.py new file mode 100644 index 00000000000..cf189f2a6ed --- /dev/null +++ b/tests/integration/test_restore_external_engines/test.py @@ -0,0 +1,218 @@ +import pytest + +import pymysql.cursors +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +configs = ["configs/remote_servers.xml", "configs/backups_disk.xml"] + +node1 = cluster.add_instance( + "replica1", + with_zookeeper=True, + with_mysql8=True, + main_configs=configs, + external_dirs=["/backups/"], +) +node2 = cluster.add_instance( + "replica2", + with_zookeeper=True, + with_mysql8=True, + main_configs=configs, + external_dirs=["/backups/"], +) +node3 = cluster.add_instance( + "replica3", + with_zookeeper=True, + with_mysql8=True, + main_configs=configs, + external_dirs=["/backups/"], +) +nodes = [node1, node2, node3] + +backup_id_counter = 0 + + +def new_backup_name(): + global backup_id_counter + backup_id_counter += 1 + return f"Disk('backups', '{backup_id_counter}/')" + + +def cleanup_nodes(nodes, dbname): + for node in nodes: + node.query(f"DROP DATABASE IF EXISTS {dbname} SYNC") + + +def fill_nodes(nodes, dbname): + cleanup_nodes(nodes, dbname) + for node in nodes: + node.query( + f"CREATE DATABASE {dbname} ENGINE = Replicated('/clickhouse/databases/{dbname}', 'default', '{node.name}')" + ) + + +def drop_mysql_table(conn, tableName): + with conn.cursor() as cursor: + cursor.execute(f"DROP TABLE IF EXISTS `clickhouse`.`{tableName}`") + + +def get_mysql_conn(cluster): + conn = pymysql.connect( + user="root", + password="clickhouse", + host=cluster.mysql8_ip, + port=cluster.mysql8_port, + ) + return conn + + +def fill_tables(cluster, dbname): + fill_nodes(nodes, dbname) + + conn = get_mysql_conn(cluster) + + with conn.cursor() as cursor: + cursor.execute("DROP DATABASE IF EXISTS clickhouse") + cursor.execute("CREATE DATABASE clickhouse") + cursor.execute("DROP TABLE IF EXISTS clickhouse.inference_table") + cursor.execute( + "CREATE TABLE clickhouse.inference_table (id INT PRIMARY KEY, data BINARY(16) NOT NULL)" + ) + cursor.execute( + "INSERT INTO clickhouse.inference_table VALUES (100, X'9fad5e9eefdfb449')" + ) + conn.commit() + + parameters = "'mysql80:3306', 'clickhouse', 'inference_table', 'root', 'clickhouse'" + + node1.query( + f"CREATE TABLE {dbname}.mysql_schema_inference_engine ENGINE=MySQL({parameters})" + ) + node1.query( + f"CREATE TABLE {dbname}.mysql_schema_inference_function AS mysql({parameters})" + ) + + node1.query(f"CREATE TABLE {dbname}.merge_tree (id UInt64, b String) ORDER BY id") + node1.query(f"INSERT INTO {dbname}.merge_tree VALUES (100, 'abc')") + + expected = "id\tInt32\t\t\t\t\t\ndata\tFixedString(16)\t\t\t\t\t\n" + assert ( + node1.query(f"DESCRIBE TABLE {dbname}.mysql_schema_inference_engine") + == expected + ) + assert ( + node1.query(f"DESCRIBE TABLE {dbname}.mysql_schema_inference_function") + == expected + ) + assert node1.query(f"SELECT id FROM mysql({parameters})") == "100\n" + assert ( + node1.query(f"SELECT id FROM {dbname}.mysql_schema_inference_engine") == "100\n" + ) + assert ( + node1.query(f"SELECT id FROM {dbname}.mysql_schema_inference_function") + == "100\n" + ) + assert node1.query(f"SELECT id FROM {dbname}.merge_tree") == "100\n" + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + + +def test_restore_table(start_cluster): + fill_tables(cluster, "replicated") + backup_name = new_backup_name() + node2.query(f"SYSTEM SYNC DATABASE REPLICA replicated") + + node2.query(f"BACKUP DATABASE replicated TO {backup_name}") + + node2.query("DROP TABLE replicated.mysql_schema_inference_engine") + node2.query("DROP TABLE replicated.mysql_schema_inference_function") + + node3.query(f"SYSTEM SYNC DATABASE REPLICA replicated") + + assert node3.query("EXISTS replicated.mysql_schema_inference_engine") == "0\n" + assert node3.query("EXISTS replicated.mysql_schema_inference_function") == "0\n" + + node3.query( + f"RESTORE DATABASE replicated FROM {backup_name} SETTINGS allow_different_database_def=true" + ) + node1.query(f"SYSTEM SYNC DATABASE REPLICA replicated") + + assert ( + node1.query( + "SELECT count(), sum(id) FROM replicated.mysql_schema_inference_engine" + ) + == "1\t100\n" + ) + assert ( + node1.query( + "SELECT count(), sum(id) FROM replicated.mysql_schema_inference_function" + ) + == "1\t100\n" + ) + assert ( + node1.query("SELECT count(), sum(id) FROM replicated.merge_tree") == "1\t100\n" + ) + cleanup_nodes(nodes, "replicated") + + +def test_restore_table_null(start_cluster): + fill_tables(cluster, "replicated2") + + backup_name = new_backup_name() + node2.query(f"SYSTEM SYNC DATABASE REPLICA replicated2") + + node2.query(f"BACKUP DATABASE replicated2 TO {backup_name}") + + node2.query("DROP TABLE replicated2.mysql_schema_inference_engine") + node2.query("DROP TABLE replicated2.mysql_schema_inference_function") + + node3.query(f"SYSTEM SYNC DATABASE REPLICA replicated2") + + assert node3.query("EXISTS replicated2.mysql_schema_inference_engine") == "0\n" + assert node3.query("EXISTS replicated2.mysql_schema_inference_function") == "0\n" + + node3.query( + f"RESTORE DATABASE replicated2 FROM {backup_name} SETTINGS allow_different_database_def=1, allow_different_table_def=1 SETTINGS restore_replace_external_engines_to_null=1, restore_replace_external_table_functions_to_null=1" + ) + node1.query(f"SYSTEM SYNC DATABASE REPLICA replicated2") + + assert ( + node1.query( + "SELECT count(), sum(id) FROM replicated2.mysql_schema_inference_engine" + ) + == "0\t0\n" + ) + assert ( + node1.query( + "SELECT count(), sum(id) FROM replicated2.mysql_schema_inference_function" + ) + == "0\t0\n" + ) + assert ( + node1.query("SELECT count(), sum(id) FROM replicated2.merge_tree") == "1\t100\n" + ) + assert ( + node1.query( + "SELECT engine FROM system.tables where database = 'replicated2' and name like '%mysql%'" + ) + == "Null\nNull\n" + ) + assert ( + node1.query( + "SELECT engine FROM system.tables where database = 'replicated2' and name like '%merge_tree%'" + ) + == "MergeTree\n" + ) + cleanup_nodes(nodes, "replicated2") diff --git a/tests/integration/test_runtime_configurable_cache_size/configs/smaller_query_cache.xml b/tests/integration/test_runtime_configurable_cache_size/configs/empty_query_cache.xml similarity index 64% rename from tests/integration/test_runtime_configurable_cache_size/configs/smaller_query_cache.xml rename to tests/integration/test_runtime_configurable_cache_size/configs/empty_query_cache.xml index 6f2de0fa8f5..c4872a0ce41 100644 --- a/tests/integration/test_runtime_configurable_cache_size/configs/smaller_query_cache.xml +++ b/tests/integration/test_runtime_configurable_cache_size/configs/empty_query_cache.xml @@ -1,7 +1,7 @@ - 1 + 0 diff --git a/tests/integration/test_runtime_configurable_cache_size/test.py b/tests/integration/test_runtime_configurable_cache_size/test.py index f761005f297..beaf83ea754 100644 --- a/tests/integration/test_runtime_configurable_cache_size/test.py +++ b/tests/integration/test_runtime_configurable_cache_size/test.py @@ -94,54 +94,61 @@ CONFIG_DIR = os.path.join(SCRIPT_DIR, "configs") def test_query_cache_size_is_runtime_configurable(start_cluster): - # the initial config specifies the maximum query cache size as 2, run 3 queries, expect 2 cache entries node.query("SYSTEM DROP QUERY CACHE") + + # The initial config allows at most two query cache entries but we don't mind node.query("SELECT 1 SETTINGS use_query_cache = 1, query_cache_ttl = 1") - node.query("SELECT 2 SETTINGS use_query_cache = 1, query_cache_ttl = 1") - node.query("SELECT 3 SETTINGS use_query_cache = 1, query_cache_ttl = 1") time.sleep(2) - node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS") - res = node.query( - "SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'", - ) - assert res == "2\n" + # At this point, the query cache contains one entry and it is stale - # switch to a config with a maximum query cache size of 1 + res = node.query( + "SELECT count(*) FROM system.query_cache", + ) + assert res == "1\n" + + # switch to a config with a maximum query cache size of _0_ node.copy_file_to_container( - os.path.join(CONFIG_DIR, "smaller_query_cache.xml"), + os.path.join(CONFIG_DIR, "empty_query_cache.xml"), "/etc/clickhouse-server/config.d/default.xml", ) node.query("SYSTEM RELOAD CONFIG") - # check that eviction worked as expected - time.sleep(2) - node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS") res = node.query( - "SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'", - ) - assert ( - res == "2\n" - ) # "Why not 1?", you think. Reason is that QC uses the TTLCachePolicy that evicts lazily only upon insert. - # Not a real issue, can be changed later, at least there's a test now. - - # Also, you may also wonder "why query_cache_ttl = 1"? Reason is that TTLCachePolicy only removes *stale* entries. With the default TTL - # (60 sec), no entries would be removed at all. Again: not a real issue, can be changed later and there's at least a test now. - - # check that the new query cache maximum size is respected when more queries run - node.query("SELECT 4 SETTINGS use_query_cache = 1, query_cache_ttl = 1") - node.query("SELECT 5 SETTINGS use_query_cache = 1, query_cache_ttl = 1") - - time.sleep(2) - node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS") - res = node.query( - "SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'", + "SELECT count(*) FROM system.query_cache", ) assert res == "1\n" + # "Why not 0?", I hear you say. Reason is that QC uses the TTLCachePolicy that evicts lazily only upon insert. + # Not a real issue, can be changed later, at least there's a test now. - # restore the original config + # The next SELECT will find a single stale entry which is one entry too much according to the new config. + # This triggers the eviction of all stale entries, in this case the 'SELECT 1' result. + # Then, it tries to insert the 'SELECT 2' result but it also cannot be added according to the config. + node.query("SELECT 2 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + res = node.query( + "SELECT count(*) FROM system.query_cache", + ) + assert res == "0\n" + + # The new maximum cache size is respected when more queries run + node.query("SELECT 3 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + res = node.query( + "SELECT count(*) FROM system.query_cache", + ) + assert res == "0\n" + + # Restore the original config node.copy_file_to_container( os.path.join(CONFIG_DIR, "default.xml"), "/etc/clickhouse-server/config.d/default.xml", ) + + node.query("SYSTEM RELOAD CONFIG") + + # It is possible to insert entries again + node.query("SELECT 4 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + res = node.query( + "SELECT count(*) FROM system.query_cache", + ) + assert res == "1\n" diff --git a/tests/integration/test_shutdown_static_destructor_failure/__init__.py b/tests/integration/test_shutdown_static_destructor_failure/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_shutdown_static_destructor_failure/test.py b/tests/integration/test_shutdown_static_destructor_failure/test.py new file mode 100644 index 00000000000..b1d925cc432 --- /dev/null +++ b/tests/integration/test_shutdown_static_destructor_failure/test.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance("node", main_configs=[], stay_alive=True) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_shutdown(): + node.query("SYSTEM ENABLE FAILPOINT lazy_pipe_fds_fail_close") + node.stop_clickhouse() diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index 756a1e1996c..3af88759e82 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -43,15 +43,10 @@ def started_cluster(): config = """ - none - + strict {certificateFile} {privateKeyFile} {caConfig} - - - AcceptCertificateHandler - """ diff --git a/tests/integration/test_storage_kerberized_hdfs/configs/hdfs.xml b/tests/integration/test_storage_kerberized_hdfs/configs/hdfs.xml deleted file mode 100644 index 2e9dc6cc06c..00000000000 --- a/tests/integration/test_storage_kerberized_hdfs/configs/hdfs.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - /tmp/keytab/clickhouse.keytab - root@TEST.CLICKHOUSE.TECH - kerberos - - - specuser@TEST.CLICKHOUSE.TECH - - - /tmp/kerb_cache - - diff --git a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh deleted file mode 100755 index db6921bc1c8..00000000000 --- a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh +++ /dev/null @@ -1,280 +0,0 @@ -#!/bin/bash - -: "${HADOOP_PREFIX:=/usr/local/hadoop}" - -cat >> $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh < /usr/local/hadoop/etc/hadoop/core-site.xml - -cat >> /usr/local/hadoop/etc/hadoop/core-site.xml << EOF - - hadoop.security.authentication - kerberos - - - hadoop.security.authorization - true - - - fs.defaultFS - hdfs://kerberizedhdfs1:9010 - - - fs.default.name - hdfs://kerberizedhdfs1:9010 - - - -EOF - - -cat > /usr/local/hadoop/etc/hadoop/hdfs-site.xml << EOF - - - - dfs.replication - 1 - - - - dfs.block.access.token.enable - true - - - - dfs.namenode.keytab.file - /usr/local/hadoop/etc/hadoop/conf/hdfs.keytab - - - dfs.namenode.kerberos.principal - hdfs/_HOST@TEST.CLICKHOUSE.TECH - - - dfs.namenode.kerberos.internal.spnego.principal - HTTP/_HOST@TEST.CLICKHOUSE.TECH - - - - dfs.secondary.namenode.keytab.file - /usr/local/hadoop/etc/hadoop/conf/hdfs.keytab - - - dfs.secondary.namenode.kerberos.principal - hdfs/_HOST@TEST.CLICKHOUSE.TECH - - - dfs.secondary.namenode.kerberos.internal.spnego.principal - HTTP/_HOST@TEST.CLICKHOUSE.TECH - - - - dfs.datanode.data.dir.perm - 700 - - - dfs.datanode.address - 0.0.0.0:1004 - - - dfs.datanode.http.address - 0.0.0.0:1006 - - - - dfs.datanode.ipc.address - 0.0.0.0:0 - - - dfs.namenode.secondary.http-address - 0.0.0.0:0 - - - dfs.namenode.backup.address - 0.0.0.0:0 - - - dfs.namenode.backup.http-address - 0.0.0.0:0 - - - - - - dfs.datanode.keytab.file - /usr/local/hadoop/etc/hadoop/conf/hdfs.keytab - - - dfs.datanode.kerberos.principal - hdfs/_HOST@TEST.CLICKHOUSE.TECH - - - - - dfs.webhdfs.enabled - true - - -dfs.encrypt.data.transfer -false - - - dfs.web.authentication.kerberos.principal - HTTP/_HOST@TEST.CLICKHOUSE.TECH - - - dfs.web.authentication.kerberos.keytab - /usr/local/hadoop/etc/hadoop/conf/hdfs.keytab - - -EOF - - - -# cat > /usr/local/hadoop/etc/hadoop/ssl-server.xml << EOF -# -# -# ssl.server.truststore.location -# /usr/local/hadoop/etc/hadoop/conf/hdfs.jks -# -# -# ssl.server.truststore.password -# masterkey -# -# -# ssl.server.keystore.location -# /usr/local/hadoop/etc/hadoop/conf/hdfs.jks -# -# -# ssl.server.keystore.password -# masterkey -# -# -# ssl.server.keystore.keypassword -# masterkey -# -# -# EOF - -cat > /usr/local/hadoop/etc/hadoop/log4j.properties << EOF -# Set everything to be logged to the console -log4j.rootCategory=DEBUG, console -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.console.target=System.err -log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n - -# Set the default spark-shell log level to WARN. When running the spark-shell, the -# log level for this class is used to overwrite the root logger's log level, so that -# the user can have different defaults for the shell and regular Spark apps. -log4j.logger.org.apache.spark.repl.Main=INFO - -# Settings to quiet third party logs that are too verbose -log4j.logger.org.spark_project.jetty=DEBUG -log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR -log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO -log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO -log4j.logger.org.apache.parquet=ERROR -log4j.logger.parquet=ERROR - -# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support -log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL -log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR - -log4j.logger.org.apache.spark.deploy=DEBUG -log4j.logger.org.apache.spark.executor=DEBUG -log4j.logger.org.apache.spark.scheduler=DEBUG -EOF - -useradd -u 1098 hdfs - -# keytool -genkey -alias kerberized_hdfs1.test.clickhouse.com -keyalg rsa -keysize 1024 -dname "CN=kerberized_hdfs1.test.clickhouse.com" -keypass masterkey -keystore /usr/local/hadoop/etc/hadoop/conf/hdfs.jks -storepass masterkey -keytool -genkey -alias kerberizedhdfs1 -keyalg rsa -keysize 1024 -dname "CN=kerberizedhdfs1" -keypass masterkey -keystore /usr/local/hadoop/etc/hadoop/conf/hdfs.jks -storepass masterkey - -chmod g+r /usr/local/hadoop/etc/hadoop/conf/hdfs.jks - - -service sshd start - -# yum --quiet --assumeyes install krb5-workstation.x86_64 -# yum --quiet --assumeyes install tcpdump - -# cd /tmp -# curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz -# tar xzf commons-daemon-1.0.15-src.tar.gz -# cd commons-daemon-1.0.15-src/src/native/unix -# ./configure && make -# cp ./jsvc /usr/local/hadoop/sbin - - -until kinit -kt /usr/local/hadoop/etc/hadoop/conf/hdfs.keytab hdfs/kerberizedhdfs1@TEST.CLICKHOUSE.TECH; do sleep 2; done -echo "KDC is up and ready to go... starting up" - -$HADOOP_PREFIX/sbin/start-dfs.sh -$HADOOP_PREFIX/sbin/start-yarn.sh -$HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver - -chmod a+r /usr/local/hadoop/etc/hadoop/conf/hdfs.keytab # create dedicated keytab for hdfsuser - -$HADOOP_PREFIX/sbin/start-secure-dns.sh -sleep 3 - -/usr/local/hadoop/bin/hdfs dfsadmin -safemode leave - -/usr/local/hadoop/bin/hdfs dfs -mkdir /user/specuser -/usr/local/hadoop/bin/hdfs dfs -chown specuser /user/specuser -echo "chown_completed" | /usr/local/hadoop/bin/hdfs dfs -appendToFile - /preparations_done_marker - -kdestroy - - - -# adduser --groups hdfs hdfsuser - -# /usr/local/hadoop/sbin/hadoop-daemon.sh --config /usr/local/hadoop/etc/hadoop/ --script /usr/local/hadoop/sbin/hdfs start namenode -# /usr/local/hadoop/sbin/hadoop-daemon.sh --config /usr/local/hadoop/etc/hadoop/ --script /usr/local/hadoop/sbin/hdfs start datanode - - -if [[ $1 == "-d" ]]; then - while true; do sleep 1000; done -fi - -if [[ $1 == "-bash" ]]; then - /bin/bash -fi diff --git a/tests/integration/test_storage_kerberized_hdfs/kerberos_image_config.sh b/tests/integration/test_storage_kerberized_hdfs/kerberos_image_config.sh deleted file mode 100644 index 45fb93792e0..00000000000 --- a/tests/integration/test_storage_kerberized_hdfs/kerberos_image_config.sh +++ /dev/null @@ -1,140 +0,0 @@ -#!/bin/bash - - -set -x # trace - -: "${REALM:=TEST.CLICKHOUSE.TECH}" -: "${DOMAIN_REALM:=test.clickhouse.com}" -: "${KERB_MASTER_KEY:=masterkey}" -: "${KERB_ADMIN_USER:=admin}" -: "${KERB_ADMIN_PASS:=admin}" - -create_config() { - : "${KDC_ADDRESS:=$(hostname -f)}" - - cat>/etc/krb5.conf</var/kerberos/krb5kdc/kdc.conf< /var/kerberos/krb5kdc/kadm5.acl -} - -create_keytabs() { - rm /tmp/keytab/*.keytab - - - # kadmin.local -q "addprinc -randkey hdfs/kerberizedhdfs1.${DOMAIN_REALM}@${REALM}" - # kadmin.local -q "ktadd -norandkey -k /tmp/keytab/hdfs.keytab hdfs/kerberizedhdfs1.${DOMAIN_REALM}@${REALM}" - - # kadmin.local -q "addprinc -randkey HTTP/kerberizedhdfs1.${DOMAIN_REALM}@${REALM}" - # kadmin.local -q "ktadd -norandkey -k /tmp/keytab/hdfs.keytab HTTP/kerberizedhdfs1.${DOMAIN_REALM}@${REALM}" - kadmin.local -q "addprinc -randkey hdfs/kerberizedhdfs1@${REALM}" - kadmin.local -q "ktadd -norandkey -k /tmp/keytab/hdfs.keytab hdfs/kerberizedhdfs1@${REALM}" - - kadmin.local -q "addprinc -randkey HTTP/kerberizedhdfs1@${REALM}" - kadmin.local -q "ktadd -norandkey -k /tmp/keytab/hdfs.keytab HTTP/kerberizedhdfs1@${REALM}" - - kadmin.local -q "addprinc -randkey hdfsuser/node1@${REALM}" - kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse.keytab hdfsuser/node1@${REALM}" - kadmin.local -q "addprinc -randkey hdfsuser@${REALM}" - kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse.keytab hdfsuser@${REALM}" - kadmin.local -q "addprinc -randkey root@${REALM}" - kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse.keytab root@${REALM}" - kadmin.local -q "addprinc -randkey specuser@${REALM}" - kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse.keytab specuser@${REALM}" - - chmod g+r /tmp/keytab/clickhouse.keytab -} - -main() { - - if [ ! -f /kerberos_initialized ]; then - create_config - create_db - create_admin_user - start_kdc - - touch /kerberos_initialized - fi - - if [ ! -f /var/kerberos/krb5kdc/principal ]; then - while true; do sleep 1000; done - else - start_kdc - create_keytabs - tail -F /var/log/kerberos/krb5kdc.log - fi - -} - -[[ "$0" == "${BASH_SOURCE[0]}" ]] && main "$@" diff --git a/tests/integration/test_storage_kerberized_hdfs/secrets/krb.conf b/tests/integration/test_storage_kerberized_hdfs/secrets/krb.conf deleted file mode 100644 index dffdcaebe81..00000000000 --- a/tests/integration/test_storage_kerberized_hdfs/secrets/krb.conf +++ /dev/null @@ -1,25 +0,0 @@ -[logging] - default = FILE:/var/log/krb5libs.log - kdc = FILE:/var/log/krb5kdc.log - admin_server = FILE:/var/log/kadmind.log - -[libdefaults] - default_realm = TEST.CLICKHOUSE.TECH - dns_lookup_realm = false - dns_lookup_kdc = false - ticket_lifetime = 5s - forwardable = true - rdns = false - default_tgs_enctypes = des3-hmac-sha1 - default_tkt_enctypes = des3-hmac-sha1 - permitted_enctypes = des3-hmac-sha1 - -[realms] - TEST.CLICKHOUSE.TECH = { - kdc = hdfskerberos - admin_server = hdfskerberos - } - -[domain_realm] - .test.clickhouse.com = TEST.CLICKHOUSE.TECH - test.clickhouse.com = TEST.CLICKHOUSE.TECH diff --git a/tests/integration/test_storage_kerberized_hdfs/secrets/krb_long.conf b/tests/integration/test_storage_kerberized_hdfs/secrets/krb_long.conf deleted file mode 100644 index 43c009d2e98..00000000000 --- a/tests/integration/test_storage_kerberized_hdfs/secrets/krb_long.conf +++ /dev/null @@ -1,24 +0,0 @@ -[logging] - default = FILE:/var/log/krb5libs.log - kdc = FILE:/var/log/krb5kdc.log - admin_server = FILE:/var/log/kadmind.log - -[libdefaults] - default_realm = TEST.CLICKHOUSE.TECH - dns_lookup_realm = false - dns_lookup_kdc = false - ticket_lifetime = 15d - forwardable = true - default_tgs_enctypes = des3-hmac-sha1 - default_tkt_enctypes = des3-hmac-sha1 - permitted_enctypes = des3-hmac-sha1 - -[realms] - TEST.CLICKHOUSE.TECH = { - kdc = hdfskerberos - admin_server = hdfskerberos - } - -[domain_realm] - .test.clickhouse.com = TEST.CLICKHOUSE.TECH - test.clickhouse.com = TEST.CLICKHOUSE.TECH diff --git a/tests/integration/test_storage_kerberized_hdfs/test.py b/tests/integration/test_storage_kerberized_hdfs/test.py deleted file mode 100644 index ddfc1f6483d..00000000000 --- a/tests/integration/test_storage_kerberized_hdfs/test.py +++ /dev/null @@ -1,155 +0,0 @@ -import time -import pytest - -import os - -from helpers.cluster import ClickHouseCluster, is_arm -import subprocess - -if is_arm(): - pytestmark = pytest.mark.skip - -cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance( - "node1", - with_kerberized_hdfs=True, - user_configs=[], - main_configs=["configs/hdfs.xml"], -) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - - yield cluster - - except Exception as ex: - print(ex) - raise ex - finally: - cluster.shutdown() - - -def test_read_table(started_cluster): - hdfs_api = started_cluster.hdfs_api - - data = "1\tSerialize\t555.222\n2\tData\t777.333\n" - hdfs_api.write_data("/simple_table_function", data) - - api_read = hdfs_api.read_data("/simple_table_function") - assert api_read == data - - select_read = node1.query( - "select * from hdfs('hdfs://kerberizedhdfs1:9010/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')" - ) - assert select_read == data - - -def test_read_write_storage(started_cluster): - hdfs_api = started_cluster.hdfs_api - - node1.query( - "create table SimpleHDFSStorage2 (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9010/simple_storage1', 'TSV')" - ) - node1.query("insert into SimpleHDFSStorage2 values (1, 'Mark', 72.53)") - - api_read = hdfs_api.read_data("/simple_storage1") - assert api_read == "1\tMark\t72.53\n" - - select_read = node1.query("select * from SimpleHDFSStorage2") - assert select_read == "1\tMark\t72.53\n" - - -def test_write_storage_not_expired(started_cluster): - hdfs_api = started_cluster.hdfs_api - - node1.query( - "create table SimpleHDFSStorageNotExpired (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9010/simple_storage_not_expired', 'TSV')" - ) - - time.sleep(15) # wait for ticket expiration - node1.query("insert into SimpleHDFSStorageNotExpired values (1, 'Mark', 72.53)") - - api_read = hdfs_api.read_data("/simple_storage_not_expired") - assert api_read == "1\tMark\t72.53\n" - - select_read = node1.query("select * from SimpleHDFSStorageNotExpired") - assert select_read == "1\tMark\t72.53\n" - - -def test_two_users(started_cluster): - hdfs_api = started_cluster.hdfs_api - - node1.query( - "create table HDFSStorOne (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://kerberizedhdfs1:9010/storage_user_one', 'TSV')" - ) - node1.query("insert into HDFSStorOne values (1, 'Real', 86.00)") - - node1.query( - "create table HDFSStorTwo (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://suser@kerberizedhdfs1:9010/user/specuser/storage_user_two', 'TSV')" - ) - node1.query("insert into HDFSStorTwo values (1, 'Ideal', 74.00)") - - select_read_1 = node1.query( - "select * from hdfs('hdfs://kerberizedhdfs1:9010/user/specuser/storage_user_two', 'TSV', 'id UInt64, text String, number Float64')" - ) - - select_read_2 = node1.query( - "select * from hdfs('hdfs://suser@kerberizedhdfs1:9010/storage_user_one', 'TSV', 'id UInt64, text String, number Float64')" - ) - - -def test_read_table_expired(started_cluster): - hdfs_api = started_cluster.hdfs_api - - data = "1\tSerialize\t555.222\n2\tData\t777.333\n" - hdfs_api.write_data("/simple_table_function_relogin", data) - - started_cluster.pause_container("hdfskerberos") - time.sleep(15) - - try: - select_read = node1.query( - "select * from hdfs('hdfs://reloginuser&kerberizedhdfs1:9010/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')" - ) - assert False, "Exception have to be thrown" - except Exception as ex: - assert "DB::Exception: KerberosInit failure:" in str(ex) - - started_cluster.unpause_container("hdfskerberos") - - -def test_prohibited(started_cluster): - node1.query( - "create table HDFSStorTwoProhibited (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://suser@kerberizedhdfs1:9010/storage_user_two_prohibited', 'TSV')" - ) - try: - node1.query("insert into HDFSStorTwoProhibited values (1, 'SomeOne', 74.00)") - assert False, "Exception have to be thrown" - except Exception as ex: - assert ( - "Unable to open HDFS file: /storage_user_two_prohibited (hdfs://suser@kerberizedhdfs1:9010/storage_user_two_prohibited) error: Permission denied: user=specuser, access=WRITE" - in str(ex) - ) - - -def test_cache_path(started_cluster): - node1.query( - "create table HDFSStorCachePath (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://dedicatedcachepath@kerberizedhdfs1:9010/storage_dedicated_cache_path', 'TSV')" - ) - try: - node1.query("insert into HDFSStorCachePath values (1, 'FatMark', 92.53)") - assert False, "Exception have to be thrown" - except Exception as ex: - assert ( - "DB::Exception: hadoop.security.kerberos.ticket.cache.path cannot be set per user" - in str(ex) - ) - - -if __name__ == "__main__": - cluster.start() - input("Cluster created, press any key to destroy...") - cluster.shutdown() diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 40cbf4b44a6..ab327afe90b 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -154,6 +154,7 @@ def test_put(started_cluster, maybe_auth, positive, compression): def test_partition_by(started_cluster): + id = uuid.uuid4() bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" @@ -161,26 +162,37 @@ def test_partition_by(started_cluster): values = "(1, 2, 3), (3, 2, 1), (78, 43, 45)" filename = "test_{_partition_id}.csv" put_query = f"""INSERT INTO TABLE FUNCTION - s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{filename}', 'CSV', '{table_format}') + s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/{filename}', 'CSV', '{table_format}') PARTITION BY {partition_by} VALUES {values}""" run_query(instance, put_query) - assert "1,2,3\n" == get_s3_file_content(started_cluster, bucket, "test_3.csv") - assert "3,2,1\n" == get_s3_file_content(started_cluster, bucket, "test_1.csv") - assert "78,43,45\n" == get_s3_file_content(started_cluster, bucket, "test_45.csv") + assert "1,2,3\n" == get_s3_file_content(started_cluster, bucket, f"{id}/test_3.csv") + assert "3,2,1\n" == get_s3_file_content(started_cluster, bucket, f"{id}/test_1.csv") + assert "78,43,45\n" == get_s3_file_content( + started_cluster, bucket, f"{id}/test_45.csv" + ) filename = "test2_{_partition_id}.csv" instance.query( - f"create table p ({table_format}) engine=S3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{filename}', 'CSV') partition by column3" + f"create table p ({table_format}) engine=S3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/{filename}', 'CSV') partition by column3" ) instance.query(f"insert into p values {values}") - assert "1,2,3\n" == get_s3_file_content(started_cluster, bucket, "test2_3.csv") - assert "3,2,1\n" == get_s3_file_content(started_cluster, bucket, "test2_1.csv") - assert "78,43,45\n" == get_s3_file_content(started_cluster, bucket, "test2_45.csv") + assert "1,2,3\n" == get_s3_file_content( + started_cluster, bucket, f"{id}/test2_3.csv" + ) + assert "3,2,1\n" == get_s3_file_content( + started_cluster, bucket, f"{id}/test2_1.csv" + ) + assert "78,43,45\n" == get_s3_file_content( + started_cluster, bucket, f"{id}/test2_45.csv" + ) + + instance.query("drop table p") def test_partition_by_string_column(started_cluster): + id = uuid.uuid4() bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] # type: ClickHouseInstance table_format = "col_num UInt32, col_str String" @@ -188,21 +200,24 @@ def test_partition_by_string_column(started_cluster): values = "(1, 'foo/bar'), (3, 'йцук'), (78, '你好')" filename = "test_{_partition_id}.csv" put_query = f"""INSERT INTO TABLE FUNCTION - s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{filename}', 'CSV', '{table_format}') + s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/{filename}', 'CSV', '{table_format}') PARTITION BY {partition_by} VALUES {values}""" run_query(instance, put_query) assert '1,"foo/bar"\n' == get_s3_file_content( - started_cluster, bucket, "test_foo/bar.csv" + started_cluster, bucket, f"{id}/test_foo/bar.csv" + ) + assert '3,"йцук"\n' == get_s3_file_content( + started_cluster, bucket, f"{id}/test_йцук.csv" ) - assert '3,"йцук"\n' == get_s3_file_content(started_cluster, bucket, "test_йцук.csv") assert '78,"你好"\n' == get_s3_file_content( - started_cluster, bucket, "test_你好.csv" + started_cluster, bucket, f"{id}/test_你好.csv" ) def test_partition_by_const_column(started_cluster): + id = uuid.uuid4() bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" @@ -211,12 +226,14 @@ def test_partition_by_const_column(started_cluster): values_csv = "1,2,3\n3,2,1\n78,43,45\n" filename = "test_{_partition_id}.csv" put_query = f"""INSERT INTO TABLE FUNCTION - s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{filename}', 'CSV', '{table_format}') + s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/{filename}', 'CSV', '{table_format}') PARTITION BY {partition_by} VALUES {values}""" run_query(instance, put_query) - assert values_csv == get_s3_file_content(started_cluster, bucket, "test_88.csv") + assert values_csv == get_s3_file_content( + started_cluster, bucket, f"{id}/test_88.csv" + ) @pytest.mark.parametrize("special", ["space", "plus"]) @@ -276,46 +293,31 @@ def test_get_path_with_special(started_cluster, special): @pytest.mark.parametrize("auth", [pytest.param("'minio','minio123',", id="minio")]) def test_empty_put(started_cluster, auth): # type: (ClickHouseCluster, str) -> None - + id = uuid.uuid4() bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" drop_empty_table_query = "DROP TABLE IF EXISTS empty_table" - create_empty_table_query = """ - CREATE TABLE empty_table ( - {} - ) ENGINE = Null() - """.format( - table_format + create_empty_table_query = ( + f"CREATE TABLE empty_table ({table_format}) ENGINE = Null()" ) run_query(instance, drop_empty_table_query) run_query(instance, create_empty_table_query) filename = "empty_put_test.csv" - put_query = "insert into table function s3('http://{}:{}/{}/{}', {}'CSV', '{}') select * from empty_table".format( - started_cluster.minio_ip, - MINIO_INTERNAL_PORT, - bucket, - filename, - auth, - table_format, - ) + put_query = f"""insert into table function + s3('http://{started_cluster.minio_ip}:{MINIO_INTERNAL_PORT}/{bucket}/{id}/{filename}', {auth} 'CSV', '{table_format}') + select * from empty_table""" run_query(instance, put_query) assert ( run_query( instance, - "select count(*) from s3('http://{}:{}/{}/{}', {}'CSV', '{}')".format( - started_cluster.minio_ip, - MINIO_INTERNAL_PORT, - bucket, - filename, - auth, - table_format, - ), + f"""select count(*) from + s3('http://{started_cluster.minio_ip}:{MINIO_INTERNAL_PORT}/{bucket}/{id}/{filename}', {auth} 'CSV', '{table_format}')""", ) == "0\n" ) @@ -499,6 +501,7 @@ def test_put_get_with_globs(started_cluster): def test_multipart(started_cluster, maybe_auth, positive): # type: (ClickHouseCluster, str, bool) -> None + id = uuid.uuid4() bucket = ( started_cluster.minio_bucket if not maybe_auth @@ -521,7 +524,7 @@ def test_multipart(started_cluster, maybe_auth, positive): assert len(csv_data) > min_part_size_bytes - filename = "test_multipart.csv" + filename = f"{id}/test_multipart.csv" put_query = "insert into table function s3('http://{}:{}/{}/{}', {}'CSV', '{}') format CSV".format( started_cluster.minio_redirect_host, started_cluster.minio_redirect_port, @@ -693,7 +696,7 @@ def test_s3_glob_many_objects_under_selection(started_cluster): def create_files(thread_num): for f_num in range(thread_num * 63, thread_num * 63 + 63): path = f"folder1/file{f_num}.csv" - query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values {}".format( + query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') settings s3_truncate_on_insert=1 values {}".format( started_cluster.minio_ip, MINIO_INTERNAL_PORT, bucket, @@ -706,7 +709,7 @@ def test_s3_glob_many_objects_under_selection(started_cluster): jobs.append(threading.Thread(target=create_files, args=(thread_num,))) jobs[-1].start() - query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values {}".format( + query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') settings s3_truncate_on_insert=1 values {}".format( started_cluster.minio_ip, MINIO_INTERNAL_PORT, bucket, @@ -881,7 +884,7 @@ def test_storage_s3_get_unstable(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] table_format = "column1 Int64, column2 Int64, column3 Int64, column4 Int64" - get_query = f"SELECT count(), sum(column3), sum(column4) FROM s3('http://resolver:8081/{started_cluster.minio_bucket}/test.csv', 'CSV', '{table_format}') FORMAT CSV" + get_query = f"SELECT count(), sum(column3), sum(column4) FROM s3('http://resolver:8081/{started_cluster.minio_bucket}/test.csv', 'CSV', '{table_format}') SETTINGS s3_max_single_read_retries=30 FORMAT CSV" result = run_query(instance, get_query) assert result.splitlines() == ["500001,500000,0"] @@ -896,9 +899,10 @@ def test_storage_s3_get_slow(started_cluster): def test_storage_s3_put_uncompressed(started_cluster): + id = uuid.uuid4() bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] - filename = "test_put_uncompressed.bin" + filename = f"{id}/test_put_uncompressed.bin" name = "test_put_uncompressed" data = [ "'Gloria Thompson',99", @@ -950,6 +954,7 @@ def test_storage_s3_put_uncompressed(started_cluster): r = result.strip().split("\t") assert int(r[0]) >= 1, blob_storage_log assert all(col == r[0] for col in r), blob_storage_log + run_query(instance, f"DROP TABLE {name}") @pytest.mark.parametrize( @@ -957,9 +962,10 @@ def test_storage_s3_put_uncompressed(started_cluster): [pytest.param("bin", "gzip", id="bin"), pytest.param("gz", "auto", id="gz")], ) def test_storage_s3_put_gzip(started_cluster, extension, method): + id = uuid.uuid4() bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] - filename = f"test_put_gzip.{extension}" + filename = f"{id}/test_put_gzip.{extension}" name = f"test_put_gzip_{extension}" data = [ "'Joseph Tomlinson',5", @@ -996,6 +1002,7 @@ def test_storage_s3_put_gzip(started_cluster, extension, method): f = gzip.GzipFile(fileobj=buf, mode="rb") uncompressed_content = f.read().decode() assert sum([int(i.split(",")[1]) for i in uncompressed_content.splitlines()]) == 708 + run_query(instance, f"DROP TABLE {name}") def test_truncate_table(started_cluster): @@ -1021,14 +1028,24 @@ def test_truncate_table(started_cluster): len(list(minio.list_objects(started_cluster.minio_bucket, "truncate/"))) == 0 ): - return + break timeout -= 1 time.sleep(1) assert len(list(minio.list_objects(started_cluster.minio_bucket, "truncate/"))) == 0 - assert instance.query("SELECT * FROM {}".format(name)) == "" + # FIXME: there was a bug in test and it was never checked. + # Currently read from truncated table fails with + # DB::Exception: Failed to get object info: No response body.. + # HTTP response code: 404: while reading truncate: While executing S3Source + # assert instance.query("SELECT * FROM {}".format(name)) == "" + instance.query(f"DROP TABLE {name} SYNC") + assert ( + instance.query(f"SELECT count() FROM system.tables where name='{name}'") + == "0\n" + ) def test_predefined_connection_configuration(started_cluster): + id = uuid.uuid4() bucket = started_cluster.minio_bucket instance = started_cluster.instances[ "dummy_without_named_collections" @@ -1056,7 +1073,9 @@ def test_predefined_connection_configuration(started_cluster): user="user", ) - instance.query(f"INSERT INTO {name} SELECT number FROM numbers(10)") + instance.query( + f"INSERT INTO {name} SELECT number FROM numbers(10) SETTINGS s3_truncate_on_insert=1" + ) result = instance.query(f"SELECT * FROM {name}") assert result == instance.query("SELECT number FROM numbers(10)") @@ -1070,9 +1089,11 @@ def test_predefined_connection_configuration(started_cluster): "To execute this query, it's necessary to have the grant NAMED COLLECTION ON no_collection" in error ) - instance = started_cluster.instances["dummy"] # has named collection access - error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") + instance2 = started_cluster.instances["dummy"] # has named collection access + error = instance2.query_and_get_error("SELECT * FROM s3(no_collection)") assert "There is no named collection `no_collection`" in error + instance.query("DROP USER user") + instance.query(f"DROP TABLE {name}") result = "" @@ -1127,31 +1148,28 @@ def test_url_reconnect_in_the_middle(started_cluster): assert result == "1000000\t3914219105369203805\n" -def test_seekable_formats(started_cluster): - bucket = started_cluster.minio_bucket +# At the time of writing the actual read bytes are respectively 148 and 169, so -10% to not be flaky +@pytest.mark.parametrize( + "format_name,expected_bytes_read", [("Parquet", 133), ("ORC", 150)] +) +def test_seekable_formats(started_cluster, format_name, expected_bytes_read): + expected_lines = 1500000 instance = started_cluster.instances["dummy"] # type: ClickHouseInstance - table_function = f"s3(s3_parquet, structure='a Int32, b String', format='Parquet')" + table_function = f"s3(s3_{format_name.lower()}, structure='a Int32, b String', format='{format_name}')" exec_query_with_retry( instance, - f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1000000) settings s3_truncate_on_insert=1", - timeout=100, + f"INSERT INTO TABLE FUNCTION {table_function} SELECT number, randomString(100) FROM numbers({expected_lines}) settings s3_truncate_on_insert=1", + timeout=300, ) result = instance.query(f"SELECT count() FROM {table_function}") - assert int(result) == 1000000 - - table_function = f"s3(s3_orc, structure='a Int32, b String', format='ORC')" - exec_query_with_retry( - instance, - f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1500000) settings s3_truncate_on_insert=1", - timeout=100, - ) + assert int(result) == expected_lines result = instance.query( f"SELECT count() FROM {table_function} SETTINGS max_memory_usage='60M', max_download_threads=1" ) - assert int(result) == 1500000 + assert int(result) == expected_lines instance.query(f"SELECT * FROM {table_function} FORMAT Null") @@ -1162,35 +1180,31 @@ def test_seekable_formats(started_cluster): result = result.strip() assert result.endswith("MiB") result = result[: result.index(".")] - assert int(result) > 150 + assert int(result) > 140 -def test_seekable_formats_url(started_cluster): +@pytest.mark.parametrize("format_name", ["Parquet", "ORC"]) +def test_seekable_formats_url(started_cluster, format_name): bucket = started_cluster.minio_bucket + expected_lines = 1500000 instance = started_cluster.instances["dummy"] # type: ClickHouseInstance - table_function = f"s3(s3_parquet, structure='a Int32, b String', format='Parquet')" + format_name_lower = format_name.lower() + table_function = f"s3(s3_{format_name_lower}, structure='a Int32, b String', format='{format_name}')" exec_query_with_retry( instance, - f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1500000) settings s3_truncate_on_insert=1", - timeout=100, + f"INSERT INTO TABLE FUNCTION {table_function} SELECT number, randomString(100) FROM numbers({expected_lines}) settings s3_truncate_on_insert=1", + timeout=300, ) result = instance.query(f"SELECT count() FROM {table_function}") - assert int(result) == 1500000 + assert int(result) == expected_lines - table_function = f"s3(s3_orc, structure='a Int32, b String', format='ORC')" - exec_query_with_retry( - instance, - f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1500000) settings s3_truncate_on_insert=1", - timeout=100, - ) - - table_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_parquet', 'Parquet', 'a Int32, b String')" + url_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_{format_name_lower}', '{format_name}', 'a Int32, b String')" result = instance.query( - f"SELECT count() FROM {table_function} SETTINGS max_memory_usage='60M'" + f"SELECT count() FROM {url_function} SETTINGS max_memory_usage='60M'" ) - assert int(result) == 1500000 + assert int(result) == expected_lines def test_empty_file(started_cluster): @@ -1222,7 +1236,7 @@ def test_s3_schema_inference(started_cluster): instance = started_cluster.instances["dummy"] instance.query( - f"insert into table function s3(s3_native, structure='a Int32, b String', format='Native') select number, randomString(100) from numbers(5000000)" + f"insert into table function s3(s3_native, structure='a Int32, b String', format='Native') select number, randomString(100) from numbers(5000000) SETTINGS s3_truncate_on_insert=1" ) result = instance.query(f"desc s3(s3_native, format='Native')") assert result == "a\tInt32\t\t\t\t\t\nb\tString\t\t\t\t\t\n" @@ -1262,6 +1276,9 @@ def test_s3_schema_inference(started_cluster): result = instance.query(f"select count(*) from {table_function}") assert int(result) == 5000000 + instance.query("drop table schema_inference") + instance.query("drop table schema_inference_2") + def test_empty_file(started_cluster): bucket = started_cluster.minio_bucket @@ -1297,6 +1314,7 @@ def test_overwrite(started_cluster): result = instance.query(f"select count() from test_overwrite") assert int(result) == 200 + instance.query(f"drop table test_overwrite") def test_create_new_files_on_insert(started_cluster): @@ -1338,6 +1356,7 @@ def test_create_new_files_on_insert(started_cluster): result = instance.query(f"select count() from test_multiple_inserts") assert int(result) == 60 + instance.query("drop table test_multiple_inserts") def test_format_detection(started_cluster): @@ -1345,7 +1364,9 @@ def test_format_detection(started_cluster): instance = started_cluster.instances["dummy"] instance.query(f"create table arrow_table_s3 (x UInt64) engine=S3(s3_arrow)") - instance.query(f"insert into arrow_table_s3 select 1") + instance.query( + f"insert into arrow_table_s3 select 1 settings s3_truncate_on_insert=1" + ) result = instance.query(f"select * from s3(s3_arrow)") assert int(result) == 1 @@ -1360,7 +1381,9 @@ def test_format_detection(started_cluster): assert int(result) == 1 instance.query(f"create table parquet_table_s3 (x UInt64) engine=S3(s3_parquet2)") - instance.query(f"insert into parquet_table_s3 select 1") + instance.query( + f"insert into parquet_table_s3 select 1 settings s3_truncate_on_insert=1" + ) result = instance.query(f"select * from s3(s3_parquet2)") assert int(result) == 1 @@ -1373,64 +1396,67 @@ def test_format_detection(started_cluster): f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.parquet')" ) assert int(result) == 1 + instance.query(f"drop table arrow_table_s3") + instance.query(f"drop table parquet_table_s3") def test_schema_inference_from_globs(started_cluster): + id = uuid.uuid4() bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] instance.query( - f"insert into table function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test1.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select NULL" + f"insert into table function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/test1.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select NULL" ) instance.query( - f"insert into table function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test2.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select 0" + f"insert into table function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/test2.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select 0" ) url_filename = "test{1,2}.jsoncompacteachrow" result = instance.query( - f"desc url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{url_filename}') settings input_format_json_infer_incomplete_types_as_strings=0" + f"desc url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/{url_filename}') settings input_format_json_infer_incomplete_types_as_strings=0" ) assert result.strip() == "c1\tNullable(Int64)" result = instance.query( - f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{url_filename}') settings input_format_json_infer_incomplete_types_as_strings=0" + f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/{url_filename}') settings input_format_json_infer_incomplete_types_as_strings=0" ) assert sorted(result.split()) == ["0", "\\N"] result = instance.query( - f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test*.jsoncompacteachrow') settings input_format_json_infer_incomplete_types_as_strings=0" + f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/test*.jsoncompacteachrow') settings input_format_json_infer_incomplete_types_as_strings=0" ) assert result.strip() == "c1\tNullable(Int64)" result = instance.query( - f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test*.jsoncompacteachrow') settings input_format_json_infer_incomplete_types_as_strings=0" + f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/test*.jsoncompacteachrow') settings input_format_json_infer_incomplete_types_as_strings=0" ) assert sorted(result.split()) == ["0", "\\N"] instance.query( - f"insert into table function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test3.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select NULL" + f"insert into table function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/test3.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select NULL" ) url_filename = "test{1,3}.jsoncompacteachrow" result = instance.query_and_get_error( - f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{url_filename}') settings schema_inference_use_cache_for_s3=0, input_format_json_infer_incomplete_types_as_strings=0" + f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/{url_filename}') settings schema_inference_use_cache_for_s3=0, input_format_json_infer_incomplete_types_as_strings=0" ) assert "All attempts to extract table structure from files failed" in result result = instance.query_and_get_error( - f"desc url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{url_filename}') settings schema_inference_use_cache_for_url=0, input_format_json_infer_incomplete_types_as_strings=0" + f"desc url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/{url_filename}') settings schema_inference_use_cache_for_url=0, input_format_json_infer_incomplete_types_as_strings=0" ) assert "All attempts to extract table structure from files failed" in result instance.query( - f"insert into table function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test0.jsoncompacteachrow', 'TSV', 'x String') select '[123;]'" + f"insert into table function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/test0.jsoncompacteachrow', 'TSV', 'x String') select '[123;]'" ) result = instance.query_and_get_error( - f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test*.jsoncompacteachrow') settings schema_inference_use_cache_for_s3=0, input_format_json_infer_incomplete_types_as_strings=0" + f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/test*.jsoncompacteachrow') settings schema_inference_use_cache_for_s3=0, input_format_json_infer_incomplete_types_as_strings=0" ) assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in result @@ -1438,7 +1464,7 @@ def test_schema_inference_from_globs(started_cluster): url_filename = "test{0,1,2,3}.jsoncompacteachrow" result = instance.query_and_get_error( - f"desc url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{url_filename}') settings schema_inference_use_cache_for_url=0, input_format_json_infer_incomplete_types_as_strings=0" + f"desc url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/{url_filename}') settings schema_inference_use_cache_for_url=0, input_format_json_infer_incomplete_types_as_strings=0" ) assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in result @@ -1498,9 +1524,12 @@ def test_signatures(started_cluster): ) assert "S3_ERROR" in error + instance.query(f"drop table test_signatures") + def test_select_columns(started_cluster): bucket = started_cluster.minio_bucket + id = uuid.uuid4() instance = started_cluster.instances["dummy"] name = "test_table2" structure = "id UInt32, value1 Int32, value2 Int32" @@ -1514,36 +1543,37 @@ def test_select_columns(started_cluster): instance.query( f"INSERT INTO {name} SELECT * FROM generateRandom('{structure}') LIMIT {limit} SETTINGS s3_truncate_on_insert=1" ) - instance.query(f"SELECT value2 FROM {name}") + instance.query(f"SELECT value2, '{id}' FROM {name}") instance.query("SYSTEM FLUSH LOGS") result1 = instance.query( - f"SELECT ProfileEvents['ReadBufferFromS3Bytes'] FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT value2 FROM {name}'" + f"SELECT ProfileEvents['ReadBufferFromS3Bytes'] FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT value2, ''{id}'' FROM {name}'" ) - instance.query(f"SELECT * FROM {name}") + instance.query(f"SELECT *, '{id}' FROM {name}") instance.query("SYSTEM FLUSH LOGS") result2 = instance.query( - f"SELECT ProfileEvents['ReadBufferFromS3Bytes'] FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT * FROM {name}'" + f"SELECT ProfileEvents['ReadBufferFromS3Bytes'] FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT *, ''{id}'' FROM {name}'" ) assert round(int(result2) / int(result1)) == 3 def test_insert_select_schema_inference(started_cluster): + id = uuid.uuid4() bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] instance.query( - f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native') select toUInt64(1) as x" + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/test_insert_select.native') select toUInt64(1) as x" ) result = instance.query( - f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native')" + f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/test_insert_select.native')" ) assert result.strip() == "x\tUInt64" result = instance.query( - f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native')" + f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{id}/test_insert_select.native')" ) assert int(result) == 1 @@ -1553,7 +1583,7 @@ def test_parallel_reading_with_memory_limit(started_cluster): instance = started_cluster.instances["dummy"] instance.query( - f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_memory_limit.native') select * from numbers(1000000)" + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_memory_limit.native') select * from numbers(1000000) SETTINGS s3_truncate_on_insert=1" ) result = instance.query_and_get_error( @@ -1574,7 +1604,7 @@ def test_wrong_format_usage(started_cluster): instance = started_cluster.instances["dummy"] instance.query( - f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_wrong_format.native') select * from numbers(10e6)" + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_wrong_format.native') select * from numbers(10e6) SETTINGS s3_truncate_on_insert=1" ) # size(test_wrong_format.native) = 10e6*8+16(header) ~= 76MiB @@ -2097,11 +2127,11 @@ def test_read_subcolumns(started_cluster): instance = started_cluster.instances["dummy"] instance.query( - f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3) SETTINGS s3_truncate_on_insert=1" ) instance.query( - f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3) SETTINGS s3_truncate_on_insert=1" ) res = instance.query( @@ -2160,7 +2190,7 @@ def test_read_subcolumn_time(started_cluster): instance = started_cluster.instances["dummy"] instance.query( - f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumn_time.tsv', auto, 'a UInt32') select (42)" + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumn_time.tsv', auto, 'a UInt32') select (42) SETTINGS s3_truncate_on_insert=1" ) res = instance.query( @@ -2171,29 +2201,30 @@ def test_read_subcolumn_time(started_cluster): def test_filtering_by_file_or_path(started_cluster): + id = uuid.uuid4() bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] instance.query( - f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter1.tsv', auto, 'x UInt64') select 1" + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter1.tsv', auto, 'x UInt64') select 1 SETTINGS s3_truncate_on_insert=1" ) instance.query( - f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter2.tsv', auto, 'x UInt64') select 2" + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter2.tsv', auto, 'x UInt64') select 2 SETTINGS s3_truncate_on_insert=1" ) instance.query( - f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter3.tsv', auto, 'x UInt64') select 3" + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter3.tsv', auto, 'x UInt64') select 3 SETTINGS s3_truncate_on_insert=1" ) instance.query( - f"select count() from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter*.tsv') where _file = 'test_filter1.tsv'" + f"select count(), '{id}' from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter*.tsv') where _file = 'test_filter1.tsv'" ) instance.query("SYSTEM FLUSH LOGS") result = instance.query( - f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query like '%select%s3%test_filter%' AND type='QueryFinish'" + f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query like '%{id}%' AND type='QueryFinish'" ) assert int(result) == 1 @@ -2206,54 +2237,56 @@ def test_filtering_by_file_or_path(started_cluster): def test_union_schema_inference_mode(started_cluster): + id = uuid.uuid4() bucket = started_cluster.minio_bucket instance = started_cluster.instances["s3_non_default"] + file_name_prefix = f"test_union_schema_inference_{id}_" instance.query( - f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference1.jsonl') select 1 as a" + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file_name_prefix}1.jsonl') select 1 as a SETTINGS s3_truncate_on_insert=1" ) instance.query( - f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference2.jsonl') select 2 as b" + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file_name_prefix}2.jsonl') select 2 as b SETTINGS s3_truncate_on_insert=1" ) instance.query( - f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference3.jsonl') select 2 as c" + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file_name_prefix}3.jsonl') select 2 as c SETTINGS s3_truncate_on_insert=1" ) instance.query( - f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference4.jsonl', TSV) select 'Error'" + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file_name_prefix}4.jsonl', TSV) select 'Error' SETTINGS s3_truncate_on_insert=1" ) for engine in ["s3", "url"]: instance.query("system drop schema cache for s3") result = instance.query( - f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference{{1,2,3}}.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV" + f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file_name_prefix}{{1,2,3}}.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV" ) assert result == "a\tNullable(Int64)\nb\tNullable(Int64)\nc\tNullable(Int64)\n" result = instance.query( - "select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache where source like '%test_union_schema_inference%' order by file format TSV" + f"select schema_inference_mode, splitByChar('/', source)[-1] as file, schema from system.schema_inference_cache where source like '%{file_name_prefix}%' order by file format TSV" ) assert ( - result == "UNION\ttest_union_schema_inference1.jsonl\ta Nullable(Int64)\n" - "UNION\ttest_union_schema_inference2.jsonl\tb Nullable(Int64)\n" - "UNION\ttest_union_schema_inference3.jsonl\tc Nullable(Int64)\n" + result == f"UNION\t{file_name_prefix}1.jsonl\ta Nullable(Int64)\n" + f"UNION\t{file_name_prefix}2.jsonl\tb Nullable(Int64)\n" + f"UNION\t{file_name_prefix}3.jsonl\tc Nullable(Int64)\n" ) result = instance.query( - f"select * from {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference{{1,2,3}}.jsonl') order by tuple(*) settings schema_inference_mode='union', describe_compact_output=1 format TSV" + f"select * from {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file_name_prefix}{{1,2,3}}.jsonl') order by tuple(*) settings schema_inference_mode='union', describe_compact_output=1 format TSV" ) assert result == "1\t\\N\t\\N\n" "\\N\t2\t\\N\n" "\\N\t\\N\t2\n" instance.query(f"system drop schema cache for {engine}") result = instance.query( - f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference2.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV" + f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file_name_prefix}2.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV" ) assert result == "b\tNullable(Int64)\n" result = instance.query( - f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference{{1,2,3}}.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV" + f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file_name_prefix}{{1,2,3}}.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV" ) assert ( result == "a\tNullable(Int64)\n" @@ -2262,7 +2295,7 @@ def test_union_schema_inference_mode(started_cluster): ) error = instance.query_and_get_error( - f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_union_schema_inference{{1,2,3,4}}.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV" + f"desc {engine}('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{file_name_prefix}{{1,2,3,4}}.jsonl') settings schema_inference_mode='union', describe_compact_output=1 format TSV" ) assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in error diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py index aa8030b077c..b7f9d7c0b96 100644 --- a/tests/integration/test_table_db_num_limit/test.py +++ b/tests/integration/test_table_db_num_limit/test.py @@ -4,9 +4,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance( - "node1", main_configs=["config/config.xml"], with_zookeeper=True -) +node = cluster.add_instance("node", main_configs=["config/config.xml"]) @pytest.fixture(scope="module") @@ -21,23 +19,28 @@ def started_cluster(): def test_table_db_limit(started_cluster): - for i in range(10): - node1.query("create database db{}".format(i)) + # By the way, default database already exists. + for i in range(9): + node.query("create database db{}".format(i)) with pytest.raises(QueryRuntimeException) as exp_info: - node1.query("create database db_exp".format(i)) + node.query("create database db_exp".format(i)) assert "TOO_MANY_DATABASES" in str(exp_info) for i in range(10): - node1.query("create table t{} (a Int32) Engine = Log".format(i)) + node.query("create table t{} (a Int32) Engine = Log".format(i)) + + # This checks that system tables are not accounted in the number of tables. + node.query("system flush logs") - node1.query("system flush logs") for i in range(10): - node1.query("drop table t{}".format(i)) + node.query("drop table t{}".format(i)) + for i in range(10): - node1.query("create table t{} (a Int32) Engine = Log".format(i)) + node.query("create table t{} (a Int32) Engine = Log".format(i)) with pytest.raises(QueryRuntimeException) as exp_info: - node1.query("create table default.tx (a Int32) Engine = Log") + node.query("create table default.tx (a Int32) Engine = Log") + assert "TOO_MANY_TABLES" in str(exp_info) diff --git a/tests/performance/decimal_aggregates.xml b/tests/performance/decimal_aggregates.xml index ec88be0124f..724d0c5d0e6 100644 --- a/tests/performance/decimal_aggregates.xml +++ b/tests/performance/decimal_aggregates.xml @@ -4,8 +4,13 @@ CREATE TABLE t (x UInt64, d32 Decimal32(3), d64 Decimal64(4), d128 Decimal128(5)) ENGINE = Memory - - INSERT INTO t SELECT number AS x, x % 1000000 AS d32, x AS d64, x d128 FROM numbers_mt(500000000) SETTINGS max_threads = 8 + + INSERT INTO t SELECT number AS x, x % 1000000 AS d32, x AS d64, x d128 FROM numbers_mt(100000000) SETTINGS max_threads = 2 + INSERT INTO t SELECT number AS x, x % 1000000 AS d32, x AS d64, x d128 FROM numbers_mt(100000000, 100000000) SETTINGS max_threads = 2 + INSERT INTO t SELECT number AS x, x % 1000000 AS d32, x AS d64, x d128 FROM numbers_mt(200000000, 100000000) SETTINGS max_threads = 2 + INSERT INTO t SELECT number AS x, x % 1000000 AS d32, x AS d64, x d128 FROM numbers_mt(300000000, 100000000) SETTINGS max_threads = 2 + INSERT INTO t SELECT number AS x, x % 1000000 AS d32, x AS d64, x d128 FROM numbers_mt(400000000, 100000000) SETTINGS max_threads = 2 + DROP TABLE IF EXISTS t SELECT min(d32), max(d32), argMin(x, d32), argMax(x, d32) FROM t diff --git a/tests/performance/generate_table_function.xml b/tests/performance/generate_table_function.xml index c219d73b6cf..d56c585188d 100644 --- a/tests/performance/generate_table_function.xml +++ b/tests/performance/generate_table_function.xml @@ -7,7 +7,7 @@ SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Asia/Istanbul\')', 0, 10, 10) LIMIT 1000000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Asia/Istanbul\')', 0, 10, 10) LIMIT 100000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 1000000000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 1000000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 100000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 0, 10, 10) LIMIT 1000000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Int8)', 0, 10, 10) LIMIT 100000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Int32))', 0, 10, 10) LIMIT 100000000); diff --git a/tests/queries/0_stateless/00186_very_long_arrays.sh b/tests/queries/0_stateless/00186_very_long_arrays.sh index 739b17ccc99..086303279fa 100755 --- a/tests/queries/0_stateless/00186_very_long_arrays.sh +++ b/tests/queries/0_stateless/00186_very_long_arrays.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-tsan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference b/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference index fc77ed8a241..63b8a9d14fc 100644 --- a/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference +++ b/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference @@ -8,13 +8,13 @@ 40 41 -41 +0 2 42 2 42 43 -43 +0 11 11 diff --git a/tests/queries/0_stateless/00366_multi_statements.reference b/tests/queries/0_stateless/00366_multi_statements.reference index 68f9261b937..71ca552522c 100644 --- a/tests/queries/0_stateless/00366_multi_statements.reference +++ b/tests/queries/0_stateless/00366_multi_statements.reference @@ -3,8 +3,10 @@ 1 1 Syntax error -Syntax error -Syntax error +1 +2 +1 +2 Syntax error Syntax error 1 diff --git a/tests/queries/0_stateless/00366_multi_statements.sh b/tests/queries/0_stateless/00366_multi_statements.sh index 9b885bb1b32..0b2e80fe457 100755 --- a/tests/queries/0_stateless/00366_multi_statements.sh +++ b/tests/queries/0_stateless/00366_multi_statements.sh @@ -10,8 +10,8 @@ $CLICKHOUSE_CLIENT --query="SELECT 1; " $CLICKHOUSE_CLIENT --query="SELECT 1 ; " $CLICKHOUSE_CLIENT --query="SELECT 1; S" 2>&1 | grep -o 'Syntax error' -$CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2" 2>&1 | grep -o 'Syntax error' -$CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2;" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2" +$CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2;" $CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2; SELECT" 2>&1 | grep -o 'Syntax error' $CLICKHOUSE_CLIENT -n --query="SELECT 1; S" 2>&1 | grep -o 'Syntax error' diff --git a/tests/queries/0_stateless/00499_json_enum_insert.sql b/tests/queries/0_stateless/00499_json_enum_insert.sql index c4988d08243..24eab38c65f 100644 --- a/tests/queries/0_stateless/00499_json_enum_insert.sql +++ b/tests/queries/0_stateless/00499_json_enum_insert.sql @@ -4,12 +4,15 @@ DROP TABLE IF EXISTS json; CREATE TABLE json (x Enum8('browser' = 1, 'mobile' = 2), y String) ENGINE = Memory; INSERT INTO json (y) VALUES ('Hello'); + SELECT * FROM json ORDER BY y; INSERT INTO json (y) FORMAT JSONEachRow {"y": "World 1"}; + SELECT * FROM json ORDER BY y; INSERT INTO json (x, y) FORMAT JSONEachRow {"y": "World 2"}; + SELECT * FROM json ORDER BY y; DROP TABLE json; diff --git a/tests/queries/0_stateless/00517_date_parsing.sql b/tests/queries/0_stateless/00517_date_parsing.sql index 9067d39ba3e..4cfede7cb2b 100644 --- a/tests/queries/0_stateless/00517_date_parsing.sql +++ b/tests/queries/0_stateless/00517_date_parsing.sql @@ -7,6 +7,7 @@ INSERT INTO date VALUES ('2017-01-02'), ('2017-1-02'), ('2017-01-2'), ('2017-1-2 SELECT * FROM date; INSERT INTO date FORMAT JSONEachRow {"d": "2017-01-02"}, {"d": "2017-1-02"}, {"d": "2017-01-2"}, {"d": "2017-1-2"}, {"d": "2017/01/02"}, {"d": "2017/1/02"}, {"d": "2017/01/2"}, {"d": "2017/1/2"}, {"d": "2017-11-12"}; + SELECT * FROM date ORDER BY d; DROP TABLE date; diff --git a/tests/queries/0_stateless/00560_float_leading_plus_in_exponent.sql b/tests/queries/0_stateless/00560_float_leading_plus_in_exponent.sql index 33e010cdc9c..d2d62510e7b 100644 --- a/tests/queries/0_stateless/00560_float_leading_plus_in_exponent.sql +++ b/tests/queries/0_stateless/00560_float_leading_plus_in_exponent.sql @@ -1,3 +1,5 @@ CREATE TEMPORARY TABLE test_float (x Float64); + INSERT INTO test_float FORMAT TabSeparated 1.075e+06 + SELECT * FROM test_float; diff --git a/tests/queries/0_stateless/00673_subquery_prepared_set_performance.sql b/tests/queries/0_stateless/00673_subquery_prepared_set_performance.sql index 0591592344c..98c0802ffbc 100644 --- a/tests/queries/0_stateless/00673_subquery_prepared_set_performance.sql +++ b/tests/queries/0_stateless/00673_subquery_prepared_set_performance.sql @@ -3,10 +3,10 @@ DROP TABLE IF EXISTS mergetree_00673; CREATE TABLE mergetree_00673 (x UInt64) ENGINE = MergeTree ORDER BY x; INSERT INTO mergetree_00673 VALUES (1); -SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM mergetree_00673 WHERE x IN (SELECT * FROM numbers(10000000)))))))))))); +SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM mergetree_00673 WHERE x IN (SELECT * FROM numbers(1000000)))))))))))))))))))))); SET force_primary_key = 1; -SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM mergetree_00673 WHERE x IN (SELECT * FROM numbers(10000000)))))))))))); +SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT * FROM mergetree_00673 WHERE x IN (SELECT * FROM numbers(1000000)))))))))))))))))))))); DROP TABLE mergetree_00673; diff --git a/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql b/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql index c4613acf5f3..d359efd8d42 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql +++ b/tests/queries/0_stateless/00688_low_cardinality_dictionary_deserialization.sql @@ -1,5 +1,5 @@ drop table if exists lc_dict_reading; -create table lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +create table lc_dict_reading (val UInt64, str LowCardinality(String), pat String) engine = MergeTree order by val SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into lc_dict_reading select number, if(number < 8192 * 4, number % 100, number) as s, s from system.numbers limit 1000000; select sum(toUInt64(str)), sum(toUInt64(pat)) from lc_dict_reading where val < 8129 or val > 8192 * 4; drop table if exists lc_dict_reading; diff --git a/tests/queries/0_stateless/00688_low_cardinality_in.sql b/tests/queries/0_stateless/00688_low_cardinality_in.sql index cb57fad51a4..c39fdb37160 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_in.sql +++ b/tests/queries/0_stateless/00688_low_cardinality_in.sql @@ -1,6 +1,6 @@ set allow_suspicious_low_cardinality_types = 1; drop table if exists lc_00688; -create table lc_00688 (str StringWithDictionary, val UInt8WithDictionary) engine = MergeTree order by tuple(); +create table lc_00688 (str LowCardinality(String), val LowCardinality(UInt8)) engine = MergeTree order by tuple(); insert into lc_00688 values ('a', 1), ('b', 2); select str, str in ('a', 'd') from lc_00688; select val, val in (1, 3) from lc_00688; diff --git a/tests/queries/0_stateless/00688_low_cardinality_prewhere.sql b/tests/queries/0_stateless/00688_low_cardinality_prewhere.sql index a15b2540fe6..17c74b7ca05 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_prewhere.sql +++ b/tests/queries/0_stateless/00688_low_cardinality_prewhere.sql @@ -1,5 +1,5 @@ drop table if exists lc_prewhere; -create table lc_prewhere (key UInt64, val UInt64, str StringWithDictionary, s String) engine = MergeTree order by key settings index_granularity = 8192; +create table lc_prewhere (key UInt64, val UInt64, str LowCardinality(String), s String) engine = MergeTree order by key settings index_granularity = 8192; insert into lc_prewhere select number, if(number < 10 or number > 8192 * 9, 1, 0), toString(number) as s, s from system.numbers limit 100000; select sum(toUInt64(str)), sum(toUInt64(s)) from lc_prewhere prewhere val == 1; drop table if exists lc_prewhere; diff --git a/tests/queries/0_stateless/00688_low_cardinality_serialization.sql b/tests/queries/0_stateless/00688_low_cardinality_serialization.sql index b4fe4b29200..1e4de3f3d3e 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_serialization.sql +++ b/tests/queries/0_stateless/00688_low_cardinality_serialization.sql @@ -8,8 +8,8 @@ select 'MergeTree'; drop table if exists lc_small_dict; drop table if exists lc_big_dict; -create table lc_small_dict (str StringWithDictionary) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -create table lc_big_dict (str StringWithDictionary) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +create table lc_small_dict (str LowCardinality(String)) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +create table lc_big_dict (str LowCardinality(String)) engine = MergeTree order by str SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into lc_small_dict select toString(number % 1000) from system.numbers limit 1000000; insert into lc_big_dict select toString(number) from system.numbers limit 1000000; diff --git a/tests/queries/0_stateless/00688_low_cardinality_syntax.reference b/tests/queries/0_stateless/00688_low_cardinality_syntax.reference index ca27069a7df..b06beb0d6f0 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_syntax.reference +++ b/tests/queries/0_stateless/00688_low_cardinality_syntax.reference @@ -1,13 +1,7 @@ a a -a -a 1 1 -1 -1 -ab -ab ab ab - diff --git a/tests/queries/0_stateless/00688_low_cardinality_syntax.sql b/tests/queries/0_stateless/00688_low_cardinality_syntax.sql index a11d9e2d9fe..dccdac1d95d 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_syntax.sql +++ b/tests/queries/0_stateless/00688_low_cardinality_syntax.sql @@ -13,56 +13,32 @@ drop table if exists lc_null_fix_str_0; drop table if exists lc_null_fix_str_1; create table lc_str_0 (str LowCardinality(String)) engine = Memory; -create table lc_str_1 (str StringWithDictionary) engine = Memory; create table lc_null_str_0 (str LowCardinality(Nullable(String))) engine = Memory; -create table lc_null_str_1 (str NullableWithDictionary(String)) engine = Memory; create table lc_int8_0 (val LowCardinality(Int8)) engine = Memory; -create table lc_int8_1 (val Int8WithDictionary) engine = Memory; create table lc_null_int8_0 (val LowCardinality(Nullable(Int8))) engine = Memory; -create table lc_null_int8_1 (val NullableWithDictionary(Int8)) engine = Memory; create table lc_fix_str_0 (str LowCardinality(FixedString(2))) engine = Memory; -create table lc_fix_str_1 (str FixedStringWithDictionary(2)) engine = Memory; create table lc_null_fix_str_0 (str LowCardinality(Nullable(FixedString(2)))) engine = Memory; -create table lc_null_fix_str_1 (str NullableWithDictionary(FixedString(2))) engine = Memory; insert into lc_str_0 select 'a'; -insert into lc_str_1 select 'a'; insert into lc_null_str_0 select 'a'; -insert into lc_null_str_1 select 'a'; insert into lc_int8_0 select 1; -insert into lc_int8_1 select 1; insert into lc_null_int8_0 select 1; -insert into lc_null_int8_1 select 1; insert into lc_fix_str_0 select 'ab'; -insert into lc_fix_str_1 select 'ab'; insert into lc_null_fix_str_0 select 'ab'; -insert into lc_null_fix_str_1 select 'ab'; select str from lc_str_0; -select str from lc_str_1; select str from lc_null_str_0; -select str from lc_null_str_1; select val from lc_int8_0; -select val from lc_int8_1; select val from lc_null_int8_0; -select val from lc_null_int8_1; select str from lc_fix_str_0; -select str from lc_fix_str_1; select str from lc_null_fix_str_0; -select str from lc_null_fix_str_1; drop table if exists lc_str_0; -drop table if exists lc_str_1; drop table if exists lc_null_str_0; -drop table if exists lc_null_str_1; drop table if exists lc_int8_0; -drop table if exists lc_int8_1; drop table if exists lc_null_int8_0; -drop table if exists lc_null_int8_1; drop table if exists lc_fix_str_0; -drop table if exists lc_fix_str_1; drop table if exists lc_null_fix_str_0; -drop table if exists lc_null_fix_str_1; select '-'; SELECT toLowCardinality('a') AS s, toTypeName(s), toTypeName(length(s)) from system.one; @@ -73,7 +49,7 @@ select (toLowCardinality(z) as val) || 'b' from (select arrayJoin(['c', 'd']) a select '-'; drop table if exists lc_str_uuid; -create table lc_str_uuid(str1 String, str2 LowCardinality(String), str3 StringWithDictionary) ENGINE=Memory; +create table lc_str_uuid(str1 String, str2 LowCardinality(String), str3 LowCardinality(String)) ENGINE=Memory; select toUUID(str1), toUUID(str2), toUUID(str3) from lc_str_uuid; select toUUID(str1, '', NULL), toUUID(str2, '', NULL), toUUID(str3, '', NULL) from lc_str_uuid; insert into lc_str_uuid values ('61f0c404-5cb3-11e7-907b-a6006ad3dba0', '61f0c404-5cb3-11e7-907b-a6006ad3dba0', '61f0c404-5cb3-11e7-907b-a6006ad3dba0'); diff --git a/tests/queries/0_stateless/00700_decimal_formats.sql b/tests/queries/0_stateless/00700_decimal_formats.sql index 45b2a5f1078..f008897bf5d 100644 --- a/tests/queries/0_stateless/00700_decimal_formats.sql +++ b/tests/queries/0_stateless/00700_decimal_formats.sql @@ -9,22 +9,22 @@ CREATE TABLE IF NOT EXISTS decimal INSERT INTO decimal (a, b, c) VALUES (42.0, -42.0, 42) (0.42, -0.42, .42) (42.42, -42.42, 42.42); INSERT INTO decimal (a, b, c) FORMAT JSONEachRow {"a":1.1, "b":-1.1, "c":1.1} {"a":1.0, "b":-1.0, "c":1} {"a":0.1, "b":-0.1, "c":.1}; + INSERT INTO decimal (a, b, c) FORMAT CSV 2.0,-2.0,2 -; + INSERT INTO decimal (a, b, c) FORMAT CSV 0.2 ,-0.2 ,.2 -; + INSERT INTO decimal (a, b, c) FORMAT CSV 2.2 , -2.2 , 2.2 -; + INSERT INTO decimal (a, b, c) FORMAT TabSeparated 3.3 -3.3 3.3 -; + INSERT INTO decimal (a, b, c) FORMAT TabSeparated 3.0 -3.0 3 -; + INSERT INTO decimal (a, b, c) FORMAT TabSeparated 0.3 -0.3 .3 -; + INSERT INTO decimal (a, b, c) FORMAT CSV 4.4E+5,-4E+8,.4E+20 -; + INSERT INTO decimal (a, b, c) FORMAT CSV 5.5e-2, -5e-9 ,.5e-17 -; SELECT * FROM decimal ORDER BY a FORMAT JSONEachRow; SELECT * FROM decimal ORDER BY b DESC FORMAT CSV; diff --git a/tests/queries/0_stateless/00705_drop_create_merge_tree.sh b/tests/queries/0_stateless/00705_drop_create_merge_tree.sh index d7754091290..ea8b9d02e49 100755 --- a/tests/queries/0_stateless/00705_drop_create_merge_tree.sh +++ b/tests/queries/0_stateless/00705_drop_create_merge_tree.sh @@ -5,8 +5,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -yes 'CREATE TABLE IF NOT EXISTS table (x UInt8) ENGINE = MergeTree ORDER BY tuple();' | head -n 1000 | $CLICKHOUSE_CLIENT --ignore-error -nm 2>/dev/null & -yes 'DROP TABLE table;' | head -n 1000 | $CLICKHOUSE_CLIENT --ignore-error -nm 2>/dev/null & +yes 'CREATE TABLE IF NOT EXISTS table (x UInt8) ENGINE = MergeTree ORDER BY tuple();' | head -n 1000 | $CLICKHOUSE_CLIENT --multiquery & +yes 'DROP TABLE IF EXISTS table;' | head -n 1000 | $CLICKHOUSE_CLIENT --multiquery & wait ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table" diff --git a/tests/queries/0_stateless/00717_low_cardinaliry_group_by.sql b/tests/queries/0_stateless/00717_low_cardinaliry_group_by.sql index 02915d4e611..3115ab508fe 100644 --- a/tests/queries/0_stateless/00717_low_cardinaliry_group_by.sql +++ b/tests/queries/0_stateless/00717_low_cardinaliry_group_by.sql @@ -1,5 +1,5 @@ drop table if exists tab_00717; -create table tab_00717 (a String, b StringWithDictionary) engine = MergeTree order by a; +create table tab_00717 (a String, b LowCardinality(String)) engine = MergeTree order by a; insert into tab_00717 values ('a_1', 'b_1'), ('a_2', 'b_2'); select count() from tab_00717; select a from tab_00717 group by a order by a; diff --git a/tests/queries/0_stateless/00718_low_cardinaliry_alter.sql b/tests/queries/0_stateless/00718_low_cardinaliry_alter.sql index 591ff952132..524e396bcb1 100644 --- a/tests/queries/0_stateless/00718_low_cardinaliry_alter.sql +++ b/tests/queries/0_stateless/00718_low_cardinaliry_alter.sql @@ -7,7 +7,7 @@ alter table tab_00718 modify column b UInt32; select *, toTypeName(b) from tab_00718; alter table tab_00718 modify column b LowCardinality(UInt32); select *, toTypeName(b) from tab_00718; -alter table tab_00718 modify column b StringWithDictionary; +alter table tab_00718 modify column b LowCardinality(String); select *, toTypeName(b) from tab_00718; alter table tab_00718 modify column b LowCardinality(UInt32); select *, toTypeName(b) from tab_00718; diff --git a/tests/queries/0_stateless/00752_low_cardinality_mv_1.sql b/tests/queries/0_stateless/00752_low_cardinality_mv_1.sql index 60cc30ec2c3..4fdd0a2496e 100644 --- a/tests/queries/0_stateless/00752_low_cardinality_mv_1.sql +++ b/tests/queries/0_stateless/00752_low_cardinality_mv_1.sql @@ -1,7 +1,7 @@ drop table if exists lc_00752; drop table if exists lc_mv_00752; -create table lc_00752 (str StringWithDictionary) engine = MergeTree order by tuple(); +create table lc_00752 (str LowCardinality(String)) engine = MergeTree order by tuple(); insert into lc_00752 values ('a'), ('bbb'), ('ab'), ('accccc'), ('baasddas'), ('bcde'); @@ -12,4 +12,3 @@ select * from lc_mv_00752 order by letter; drop table if exists lc_00752; drop table if exists lc_mv_00752; - diff --git a/tests/queries/0_stateless/00760_insert_json_with_defaults.sql b/tests/queries/0_stateless/00760_insert_json_with_defaults.sql index 1430d10cdb0..54987258375 100644 --- a/tests/queries/0_stateless/00760_insert_json_with_defaults.sql +++ b/tests/queries/0_stateless/00760_insert_json_with_defaults.sql @@ -15,20 +15,29 @@ CREATE TABLE defaults ) ENGINE = MergeTree ORDER BY x; INSERT INTO defaults FORMAT JSONEachRow {"x":1, "y":1}; + INSERT INTO defaults (x, y) SELECT x, y FROM defaults LIMIT 1; + INSERT INTO defaults FORMAT JSONEachRow {"x":2, "y":2, "c":2}; + INSERT INTO defaults FORMAT JSONEachRow {"x":3, "y":3, "a":3, "b":3, "c":3}; + INSERT INTO defaults FORMAT JSONEachRow {"x":4} {"y":5, "c":5} {"a":6, "b":6, "c":6}; SELECT * FROM defaults ORDER BY (x, y); ALTER TABLE defaults ADD COLUMN n Nested(a UInt64, b String); + INSERT INTO defaults FORMAT JSONEachRow {"x":7, "y":7, "n.a":[1,2], "n.b":["a","b"]}; + SELECT * FROM defaults WHERE x = 7 FORMAT JSONEachRow; ALTER TABLE defaults ADD COLUMN n.c Array(UInt8) DEFAULT arrayMap(x -> 0, n.a) AFTER n.a; + INSERT INTO defaults FORMAT JSONEachRow {"x":8, "y":8, "n.a":[3,4], "n.b":["c","d"]}; + INSERT INTO defaults FORMAT JSONEachRow {"x":9, "y":9}; + SELECT * FROM defaults WHERE x > 7 ORDER BY x FORMAT JSONEachRow; DROP TABLE defaults; diff --git a/tests/queries/0_stateless/00861_decimal_quoted_csv.sql b/tests/queries/0_stateless/00861_decimal_quoted_csv.sql index e716c607ef8..d7211c55df7 100644 --- a/tests/queries/0_stateless/00861_decimal_quoted_csv.sql +++ b/tests/queries/0_stateless/00861_decimal_quoted_csv.sql @@ -2,15 +2,14 @@ DROP TABLE IF EXISTS test_00861; CREATE TABLE test_00861 (key UInt64, d32 Decimal32(2), d64 Decimal64(2), d128 Decimal128(2)) ENGINE = Memory; INSERT INTO test_00861 FORMAT CSV "1","1","1","1" -; + INSERT INTO test_00861 FORMAT CSV "2","-1","-1","-1" -; + INSERT INTO test_00861 FORMAT CSV "3","1.0","1.0","1.0" -; + INSERT INTO test_00861 FORMAT CSV "4","-0.1","-0.1","-0.1" -; + INSERT INTO test_00861 FORMAT CSV "5","0.010","0.010","0.010" -; SELECT * FROM test_00861 ORDER BY key; diff --git a/tests/queries/0_stateless/00927_asof_join_correct_bt.reference b/tests/queries/0_stateless/00927_asof_join_correct_bt.reference index bb199d0159a..28c48d2e290 100644 --- a/tests/queries/0_stateless/00927_asof_join_correct_bt.reference +++ b/tests/queries/0_stateless/00927_asof_join_correct_bt.reference @@ -1,13 +1,36 @@ +-- { echoOn } +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 1 104 4 4 104 1 1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 1 104 4 4 104 1 1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 +SET join_algorithm = 'full_sorting_merge'; +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 diff --git a/tests/queries/0_stateless/00927_asof_join_correct_bt.sql b/tests/queries/0_stateless/00927_asof_join_correct_bt.sql index 281a81d51c0..d796b62d3b3 100644 --- a/tests/queries/0_stateless/00927_asof_join_correct_bt.sql +++ b/tests/queries/0_stateless/00927_asof_join_correct_bt.sql @@ -4,20 +4,29 @@ DROP TABLE IF EXISTS B; CREATE TABLE A(k UInt32, t UInt32, a UInt64) ENGINE = MergeTree() ORDER BY (k, t); INSERT INTO A(k,t,a) VALUES (1,101,1),(1,102,2),(1,103,3),(1,104,4),(1,105,5); -CREATE TABLE B(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); -SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); -DROP TABLE B; +CREATE TABLE B1(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B1(k,t,b) VALUES (1,102,2), (1,104,4); +CREATE TABLE B2(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B2(k,t,b) VALUES (1,102,2), (1,104,4); -CREATE TABLE B(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); -SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); -DROP TABLE B; +CREATE TABLE B3(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B3(k,t,b) VALUES (1,102,2), (1,104,4); -CREATE TABLE B(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); -SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); -DROP TABLE B; +-- { echoOn } +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); + +SET join_algorithm = 'full_sorting_merge'; +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); + +-- { echoOff } + +DROP TABLE B1; +DROP TABLE B2; +DROP TABLE B3; DROP TABLE A; diff --git a/tests/queries/0_stateless/00927_asof_join_long.reference b/tests/queries/0_stateless/00927_asof_join_long.reference index d4f015c68e4..ec40d2bc463 100644 --- a/tests/queries/0_stateless/00927_asof_join_long.reference +++ b/tests/queries/0_stateless/00927_asof_join_long.reference @@ -1 +1,2 @@ 3000000 +3000000 diff --git a/tests/queries/0_stateless/00927_asof_join_long.sql b/tests/queries/0_stateless/00927_asof_join_long.sql index c03a06d48d4..7a73875e93e 100644 --- a/tests/queries/0_stateless/00927_asof_join_long.sql +++ b/tests/queries/0_stateless/00927_asof_join_long.sql @@ -2,15 +2,28 @@ DROP TABLE IF EXISTS tvs; +-- to use different algorithms for in subquery +SET allow_experimental_analyzer = 1; + CREATE TABLE tvs(k UInt32, t UInt32, tv UInt64) ENGINE = Memory; INSERT INTO tvs(k,t,tv) SELECT k, t, t FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys -CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times; +CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times +SETTINGS join_algorithm = 'hash'; SELECT SUM(trades.price - tvs.tv) FROM (SELECT k, t, t as price FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys - CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times) trades + CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times + SETTINGS join_algorithm = 'hash') trades ASOF LEFT JOIN tvs USING(k,t); +SELECT SUM(trades.price - tvs.tv) FROM +(SELECT k, t, t as price + FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys + CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times + SETTINGS join_algorithm = 'hash') trades +ASOF LEFT JOIN tvs USING(k,t) +SETTINGS join_algorithm = 'full_sorting_merge'; + DROP TABLE tvs; diff --git a/tests/queries/0_stateless/00927_asof_join_noninclusive.reference b/tests/queries/0_stateless/00927_asof_join_noninclusive.reference index fe2844a2a43..d856372fb4a 100644 --- a/tests/queries/0_stateless/00927_asof_join_noninclusive.reference +++ b/tests/queries/0_stateless/00927_asof_join_noninclusive.reference @@ -27,3 +27,32 @@ 2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 +1 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0 +1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 +2 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0 +2 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0 +2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 +3 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:03 3 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:04 4 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:05 5 0 1970-01-01 00:00:00 0 +1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 +2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 +1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 +2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 diff --git a/tests/queries/0_stateless/00927_asof_join_noninclusive.sql b/tests/queries/0_stateless/00927_asof_join_noninclusive.sql index 5f15f3b593d..3cc99df4462 100644 --- a/tests/queries/0_stateless/00927_asof_join_noninclusive.sql +++ b/tests/queries/0_stateless/00927_asof_join_noninclusive.sql @@ -11,9 +11,12 @@ INSERT INTO B(k,t,b) VALUES (1,2,2),(1,4,4); INSERT INTO B(k,t,b) VALUES (2,3,3); SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); - SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t); +SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t); +SET join_algorithm = 'full_sorting_merge'; +SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t); SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t); DROP TABLE A; diff --git a/tests/queries/0_stateless/00927_asof_join_other_types.reference b/tests/queries/0_stateless/00927_asof_join_other_types.reference index 80c85ec1ae3..ddbc24ff925 100644 --- a/tests/queries/0_stateless/00927_asof_join_other_types.reference +++ b/tests/queries/0_stateless/00927_asof_join_other_types.reference @@ -1,27 +1,72 @@ +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- 2 1970-01-01 02:00:01 1 0 2 1970-01-01 02:00:03 3 3 2 1970-01-01 02:00:05 5 3 +- +2 1970-01-01 02:00:01 1 0 +2 1970-01-01 02:00:03 3 3 +2 1970-01-01 02:00:05 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1970-01-01 02:00:00.001 1 0 +2 1970-01-01 02:00:00.003 3 3 +2 1970-01-01 02:00:00.005 5 3 +- 2 1970-01-01 02:00:00.001 1 0 2 1970-01-01 02:00:00.003 3 3 2 1970-01-01 02:00:00.005 5 3 diff --git a/tests/queries/0_stateless/00927_asof_join_other_types.sh b/tests/queries/0_stateless/00927_asof_join_other_types.sh deleted file mode 100755 index 10173a3e43f..00000000000 --- a/tests/queries/0_stateless/00927_asof_join_other_types.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash - -set -e - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -for typename in "UInt32" "UInt64" "Float64" "Float32" "DateTime('Asia/Istanbul')" "Decimal32(5)" "Decimal64(5)" "Decimal128(5)" "DateTime64(3, 'Asia/Istanbul')" -do - $CLICKHOUSE_CLIENT -mn <= B.t ORDER BY (A.a, A.t); SELECT count() FROM A ASOF LEFT JOIN B ON A.a == B.b AND B.t <= A.t; SELECT A.a, A.t, B.b, B.t FROM A ASOF INNER JOIN B ON B.t <= A.t AND A.a == B.b ORDER BY (A.a, A.t); @@ -28,5 +36,8 @@ ASOF INNER JOIN (SELECT * FROM B UNION ALL SELECT 1, 3) AS B ON B.t <= A.t AND A WHERE B.t != 3 ORDER BY (A.a, A.t) ; +{% endfor -%} +{% endfor -%} + DROP TABLE A; DROP TABLE B; diff --git a/tests/queries/0_stateless/00976_system_stop_ttl_merges.sql b/tests/queries/0_stateless/00976_system_stop_ttl_merges.sql index b27e4275d5d..2ab85d90a66 100644 --- a/tests/queries/0_stateless/00976_system_stop_ttl_merges.sql +++ b/tests/queries/0_stateless/00976_system_stop_ttl_merges.sql @@ -4,7 +4,7 @@ create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDa system stop ttl merges ttl; -insert into ttl values (toDateTime('2000-10-10 00:00:00'), 1), (toDateTime('2000-10-10 00:00:00'), 2) +insert into ttl values (toDateTime('2000-10-10 00:00:00'), 1), (toDateTime('2000-10-10 00:00:00'), 2); insert into ttl values (toDateTime('2100-10-10 00:00:00'), 3), (toDateTime('2100-10-10 00:00:00'), 4); select sleep(1) format Null; -- wait if very fast merge happen diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh index 4887c409844..02a739ece4a 100755 --- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh +++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh @@ -41,7 +41,7 @@ function thread3() function thread4() { - while true; do $CLICKHOUSE_CLIENT --receive_timeout=3 -q "OPTIMIZE TABLE alter_table0 FINAL" | grep -Fv "Timeout exceeded while receiving data from server"; done + while true; do $CLICKHOUSE_CLIENT --receive_timeout=1 -q "OPTIMIZE TABLE alter_table0 FINAL" | grep -Fv "Timeout exceeded while receiving data from server"; done } function thread5() diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql index 4b9b15bee8f..4c4bcc440ca 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -139,6 +139,6 @@ SELECT {CLICKHOUSE_DATABASE:String} || '.dict3' as n, dictGet(n, 'some_column', DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict; -SYSTEM RELOAD DICTIONARIES; -- {serverError UNKNOWN_TABLE} +SYSTEM RELOAD DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict3; -- {serverError UNKNOWN_TABLE} SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict3', 'some_column', toUInt64(12)); diff --git a/tests/queries/0_stateless/01034_JSONCompactEachRow.sql b/tests/queries/0_stateless/01034_JSONCompactEachRow.sql index f71597a60e5..e47e5346c01 100644 --- a/tests/queries/0_stateless/01034_JSONCompactEachRow.sql +++ b/tests/queries/0_stateless/01034_JSONCompactEachRow.sql @@ -22,51 +22,64 @@ SELECT 5; /* Check JSONCompactEachRow Input */ CREATE TABLE test_table (v1 String, v2 UInt8, v3 DEFAULT v2 * 16, v4 UInt8 DEFAULT 8) ENGINE = MergeTree() ORDER BY v2; INSERT INTO test_table FORMAT JSONCompactEachRow ["first", 1, "2", null] ["second", 2, null, 6]; + SELECT * FROM test_table FORMAT JSONCompactEachRow; TRUNCATE TABLE test_table; SELECT 6; /* Check input_format_null_as_default = 1 */ SET input_format_null_as_default = 1; INSERT INTO test_table FORMAT JSONCompactEachRow ["first", 1, "2", null] ["second", 2, null, 6]; + SELECT * FROM test_table FORMAT JSONCompactEachRow; TRUNCATE TABLE test_table; SELECT 7; /* Check Nested */ CREATE TABLE test_table_2 (v1 UInt8, n Nested(id UInt8, name String)) ENGINE = MergeTree() ORDER BY v1; INSERT INTO test_table_2 FORMAT JSONCompactEachRow [16, [15, 16, null], ["first", "second", "third"]]; + SELECT * FROM test_table_2 FORMAT JSONCompactEachRow; TRUNCATE TABLE test_table_2; SELECT 8; /* Check JSONCompactEachRowWithNamesAndTypes and JSONCompactEachRowWithNamesAndTypes Input */ SET input_format_null_as_default = 0; INSERT INTO test_table FORMAT JSONCompactEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", 1, "2", null]["second", 2, null, 6]; + INSERT INTO test_table FORMAT JSONCompactEachRowWithNames ["v1", "v2", "v3", "v4"]["first", 1, "2", null]["second", 2, null, 6]; + SELECT * FROM test_table FORMAT JSONCompactEachRow; TRUNCATE TABLE test_table; SELECT 9; /* Check input_format_null_as_default = 1 */ SET input_format_null_as_default = 1; INSERT INTO test_table FORMAT JSONCompactEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", 1, "2", null] ["second", 2, null, 6]; + INSERT INTO test_table FORMAT JSONCompactEachRowWithNames ["v1", "v2", "v3", "v4"]["first", 1, "2", null] ["second", 2, null, 6]; + SELECT * FROM test_table FORMAT JSONCompactEachRow; SELECT 10; /* Check Header */ TRUNCATE TABLE test_table; SET input_format_skip_unknown_fields = 1; INSERT INTO test_table FORMAT JSONCompactEachRowWithNamesAndTypes ["v1", "v2", "invalid_column"]["String", "UInt8", "UInt8"]["first", 1, 32]["second", 2, "64"]; + INSERT INTO test_table FORMAT JSONCompactEachRowWithNames ["v1", "v2", "invalid_column"]["first", 1, 32]["second", 2, "64"]; + SELECT * FROM test_table FORMAT JSONCompactEachRow; SELECT 11; TRUNCATE TABLE test_table; INSERT INTO test_table FORMAT JSONCompactEachRowWithNamesAndTypes ["v4", "v2", "v3"]["UInt8", "UInt8", "UInt16"][1, 2, 3] + INSERT INTO test_table FORMAT JSONCompactEachRowWithNames ["v4", "v2", "v3"][1, 2, 3] + SELECT * FROM test_table FORMAT JSONCompactEachRowWithNamesAndTypes; SELECT '----------'; SELECT * FROM test_table FORMAT JSONCompactEachRowWithNames; SELECT 12; /* Check Nested */ INSERT INTO test_table_2 FORMAT JSONCompactEachRowWithNamesAndTypes ["v1", "n.id", "n.name"]["UInt8", "Array(UInt8)", "Array(String)"][16, [15, 16, null], ["first", "second", "third"]]; + INSERT INTO test_table_2 FORMAT JSONCompactEachRowWithNames ["v1", "n.id", "n.name"][16, [15, 16, null], ["first", "second", "third"]]; + SELECT * FROM test_table_2 FORMAT JSONCompactEachRowWithNamesAndTypes; SELECT '----------'; SELECT * FROM test_table_2 FORMAT JSONCompactEachRowWithNames; diff --git a/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql b/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql index d0841124706..1334780a5af 100644 --- a/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql +++ b/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql @@ -1,3 +1,6 @@ +-- Tags: no-parallel +-- Does not allow if other tests do SYSTEM RELOAD DICTIONARIES at the same time. + CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); CREATE DICTIONARY dict ( diff --git a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.reference b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.reference index f12dcd8258a..10bc7981d3e 100644 --- a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.reference +++ b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.reference @@ -1,6 +1,6 @@ 12 -> 102 13 -> 103 14 -> -1 -12(r) -> 102 -13(r) -> 103 -14(r) -> 104 +12 (after reloading) -> 102 +13 (after reloading) -> 103 +14 (after reloading) -> 104 diff --git a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh index 2b075566ac3..42488ca946c 100755 --- a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh +++ b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh @@ -1,4 +1,6 @@ #!/usr/bin/env bash +# Tags: no-random-settings +# Dictionaries are updated using the server time. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -6,8 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -o pipefail -# NOTE: dictionaries TTLs works with server timezone, so session_timeout cannot be used -$CLICKHOUSE_CLIENT --session_timezone '' --multiquery < ', dictGetInt64('${CLICKHOUSE_DATABASE $CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table VALUES (13, 103, now())" $CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table VALUES (14, 104, now() - INTERVAL 1 DAY)" +# Wait when the dictionary will update the value for 13 on its own: while [ "$(${CLICKHOUSE_CLIENT} --query "SELECT dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))")" = -1 ] - do - sleep 0.5 - done +do + sleep 0.5 +done $CLICKHOUSE_CLIENT --query "SELECT '13 -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))" + +# By the way, the value for 14 is expected to not be updated at this moment, +# because the values were selected by the update field insert_time, and for 14 it was set as one day ago. $CLICKHOUSE_CLIENT --query "SELECT '14 -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(14))" +# SYSTEM RELOAD DICTIONARY reloads it completely, regardless of the update field, so we will see new values, even for key 14. $CLICKHOUSE_CLIENT --query "SYSTEM RELOAD DICTIONARY '${CLICKHOUSE_DATABASE}.dict'" -$CLICKHOUSE_CLIENT --query "SELECT '12(r) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(12))" -$CLICKHOUSE_CLIENT --query "SELECT '13(r) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))" -$CLICKHOUSE_CLIENT --query "SELECT '14(r) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(14))" +$CLICKHOUSE_CLIENT --query "SELECT '12 (after reloading) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(12))" +$CLICKHOUSE_CLIENT --query "SELECT '13 (after reloading) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))" +$CLICKHOUSE_CLIENT --query "SELECT '14 (after reloading) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(14))" diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh index 4325ebeed24..5c70806ea7b 100755 --- a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh index 8e28995980f..32c9c52ab09 100755 --- a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh +++ b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh index ee11b265ecd..ba566bb4ae6 100755 --- a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh +++ b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh index ea8ad372617..0db4173b3dc 100755 --- a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh +++ b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01060_defaults_all_columns.sql b/tests/queries/0_stateless/01060_defaults_all_columns.sql index afbb01b8cb2..74fad7f75fb 100644 --- a/tests/queries/0_stateless/01060_defaults_all_columns.sql +++ b/tests/queries/0_stateless/01060_defaults_all_columns.sql @@ -3,6 +3,7 @@ DROP TABLE IF EXISTS defaults_all_columns; CREATE TABLE defaults_all_columns (n UInt8 DEFAULT 42, s String DEFAULT concat('test', CAST(n, 'String'))) ENGINE = Memory; INSERT INTO defaults_all_columns FORMAT JSONEachRow {"n": 1, "s": "hello"} {}; + INSERT INTO defaults_all_columns FORMAT JSONEachRow {"n": 2}, {"s": "world"}; SELECT * FROM defaults_all_columns ORDER BY n, s; diff --git a/tests/queries/0_stateless/01072_json_each_row_data_in_square_brackets.sql b/tests/queries/0_stateless/01072_json_each_row_data_in_square_brackets.sql index f7ccc309c5a..ae5e86ec387 100644 --- a/tests/queries/0_stateless/01072_json_each_row_data_in_square_brackets.sql +++ b/tests/queries/0_stateless/01072_json_each_row_data_in_square_brackets.sql @@ -2,9 +2,13 @@ DROP TABLE IF EXISTS json_square_brackets; CREATE TABLE json_square_brackets (id UInt32, name String) ENGINE = Memory; + INSERT INTO json_square_brackets FORMAT JSONEachRow [{"id": 1, "name": "name1"}, {"id": 2, "name": "name2"}]; + INSERT INTO json_square_brackets FORMAT JSONEachRow[]; + INSERT INTO json_square_brackets FORMAT JSONEachRow [ ] ; + INSERT INTO json_square_brackets FORMAT JSONEachRow ; SELECT * FROM json_square_brackets ORDER BY id; diff --git a/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh b/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh index f7842af4dad..67c249a9d0e 100755 --- a/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh +++ b/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh b/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh index dcd15718416..e003d2a26da 100755 --- a/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh +++ b/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race +# Tags: race, no-parallel # This is a monkey test used to trigger sanitizers. diff --git a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference index 1055a67ea5b..0aa1a85f19d 100644 --- a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference +++ b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference @@ -1,3 +1,6 @@ v1 o1 ['s2','s1'] v1 o2 ['s4'] v2 o3 ['s5','s3'] +v1 o1 ['s2','s1'] +v1 o2 ['s4'] +v2 o3 ['s5','s3'] diff --git a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql index 8a94b6ddd24..652cb35cf2a 100644 --- a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql +++ b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql @@ -16,3 +16,17 @@ GROUP BY ORDER BY visitorId ASC, orderId ASC; + +SELECT + visitorId, + orderId, + groupUniqArray(sessionId) +FROM sessions +ASOF INNER JOIN orders ON (sessions.visitorId = orders.visitorId) AND (sessions.date <= orders.date) +GROUP BY + visitorId, + orderId +ORDER BY + visitorId ASC, + orderId ASC +SETTINGS join_algorithm = 'full_sorting_merge'; diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.reference b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.reference index d8bb9e310e6..e69de29bb2d 100644 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.reference +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.reference @@ -1,4 +0,0 @@ -275 0 138 136 0 -275 0 -275 0 138 136 0 -275 0 diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh index 2ab7f883367..3b9bb50517d 100755 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-ordinary-database, no-debug +# Tags: long, no-parallel, no-ordinary-database # Test is too heavy, avoid parallel run in Flaky Check # shellcheck disable=SC2119 @@ -7,82 +7,126 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -set -e +set -ue $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS src"; $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS dst"; $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS mv"; $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tmp"; -$CLICKHOUSE_CLIENT --query "CREATE TABLE src (n Int8, m Int8, CONSTRAINT c CHECK xxHash32(n+m) % 8 != 0) ENGINE=MergeTree ORDER BY n PARTITION BY 0 < n SETTINGS old_parts_lifetime=0"; -$CLICKHOUSE_CLIENT --query "CREATE TABLE dst (nm Int16, CONSTRAINT c CHECK xxHash32(nm) % 8 != 0) ENGINE=MergeTree ORDER BY nm SETTINGS old_parts_lifetime=0"; -$CLICKHOUSE_CLIENT --query "CREATE MATERIALIZED VIEW mv TO dst (nm Int16) AS SELECT n*m AS nm FROM src"; -$CLICKHOUSE_CLIENT --query "CREATE TABLE tmp (x UInt8, nm Int16) ENGINE=MergeTree ORDER BY (x, nm) SETTINGS old_parts_lifetime=0" +$CLICKHOUSE_CLIENT --query "CREATE TABLE src (n Int32, m Int32, CONSTRAINT c CHECK xxHash32(n+m) % 8 != 0) ENGINE=MergeTree ORDER BY n PARTITION BY 0 < n SETTINGS old_parts_lifetime=0"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE dst (nm Int32, CONSTRAINT c CHECK xxHash32(nm) % 8 != 0) ENGINE=MergeTree ORDER BY nm SETTINGS old_parts_lifetime=0"; +$CLICKHOUSE_CLIENT --query "CREATE MATERIALIZED VIEW mv TO dst (nm Int32) AS SELECT n*m AS nm FROM src"; + +$CLICKHOUSE_CLIENT --query "CREATE TABLE tmp (x UInt32, nm Int32) ENGINE=MergeTree ORDER BY (x, nm) SETTINGS old_parts_lifetime=0" $CLICKHOUSE_CLIENT --query "INSERT INTO src VALUES (0, 0)" -# some transactions will fail due to constraint -function thread_insert_commit() +function get_now() { - set -e - for i in {1..100}; do - $CLICKHOUSE_CLIENT --multiquery --query " - BEGIN TRANSACTION; - INSERT INTO src VALUES /* ($i, $1) */ ($i, $1); - SELECT throwIf((SELECT sum(nm) FROM mv) != $(($i * $1))) FORMAT Null; - INSERT INTO src VALUES /* (-$i, $1) */ (-$i, $1); - COMMIT;" 2>&1| grep -Fv "is violated at row" | grep -Fv "Transaction is not in RUNNING state" | grep -F "Received from " ||: - done + date +%s } -function thread_insert_rollback() +is_pid_exist() +{ + local pid=$1 + ps -p $pid > /dev/null +} + +function run_until_deadline_and_at_least_times() { set -e - for _ in {1..100}; do - $CLICKHOUSE_CLIENT --multiquery --query " - BEGIN TRANSACTION; - INSERT INTO src VALUES /* (42, $1) */ (42, $1); - SELECT throwIf((SELECT count() FROM src WHERE n=42 AND m=$1) != 1) FORMAT Null; - ROLLBACK;" + + local deadline=$1; shift + local min_iterations=$1; shift + local function_to_run=$1; shift + + local started_time + started_time=$(get_now) + local i=0 + + while true + do + $function_to_run $i "$@" + + [[ $(get_now) -lt $deadline ]] || break + + i=$(($i + 1)) done + + [[ $i -gt $min_iterations ]] || echo "$i/$min_iterations : not enough iterations of $function_to_run has been made from $started_time until $deadline" >&2 +} + +function insert_commit_action() +{ + set -e + + local i=$1; shift + local tag=$1; shift + + # some transactions will fail due to constraint + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* ($i, $tag) */ ($i, $tag); + SELECT throwIf((SELECT sum(nm) FROM mv) != $(($i * $tag))) /* ($i, $tag) */ FORMAT Null; + INSERT INTO src VALUES /* (-$i, $tag) */ (-$i, $tag); + COMMIT; + " 2>&1 \ + | grep -Fv "is violated at row" | grep -Fv "Transaction is not in RUNNING state" | grep -F "Received from " ||: +} + + +function insert_rollback_action() +{ + set -e + + local i=$1; shift + local tag=$1; shift + + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* (42, $tag) */ (42, $tag); + SELECT throwIf((SELECT count() FROM src WHERE n=42 AND m=$tag) != 1) FORMAT Null; + ROLLBACK;" } # make merges more aggressive -function thread_optimize() +function optimize_action() { set -e - while true; do - optimize_query="OPTIMIZE TABLE src" - partition_id=$(( RANDOM % 2 )) - if (( RANDOM % 2 )); then - optimize_query="OPTIMIZE TABLE dst" - partition_id="all" - fi - if (( RANDOM % 2 )); then - optimize_query="$optimize_query PARTITION ID '$partition_id'" - fi - if (( RANDOM % 2 )); then - optimize_query="$optimize_query FINAL" - fi - action="COMMIT" - if (( RANDOM % 4 )); then - action="ROLLBACK" - fi - $CLICKHOUSE_CLIENT --multiquery --query " + optimize_query="OPTIMIZE TABLE src" + partition_id=$(( RANDOM % 2 )) + if (( RANDOM % 2 )); then + optimize_query="OPTIMIZE TABLE dst" + partition_id="all" + fi + if (( RANDOM % 2 )); then + optimize_query="$optimize_query PARTITION ID '$partition_id'" + fi + if (( RANDOM % 2 )); then + optimize_query="$optimize_query FINAL" + fi + action="COMMIT" + if (( RANDOM % 4 )); then + action="ROLLBACK" + fi + + $CLICKHOUSE_CLIENT --multiquery --query " BEGIN TRANSACTION; - $optimize_query; + $optimize_query; $action; - " 2>&1| grep -Fv "already exists, but it will be deleted soon" | grep -F "Received from " ||: - sleep 0.$RANDOM; - done + " 2>&1 \ + | grep -Fv "already exists, but it will be deleted soon" | grep -F "Received from " ||: + + sleep 0.$RANDOM; } -function thread_select() +function select_action() { set -e - while true; do - $CLICKHOUSE_CLIENT --multiquery --query " + + $CLICKHOUSE_CLIENT --multiquery --query " BEGIN TRANSACTION; SELECT throwIf((SELECT (sum(n), count() % 2) FROM src) != (0, 1)) FORMAT Null; SELECT throwIf((SELECT (sum(nm), count() % 2) FROM mv) != (0, 1)) FORMAT Null; @@ -90,14 +134,13 @@ function thread_select() SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(nm)) FROM dst)) FORMAT Null; SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(n*m)) FROM src)) FORMAT Null; COMMIT;" - done } -function thread_select_insert() +function select_insert_action() { set -e - while true; do - $CLICKHOUSE_CLIENT --multiquery --query " + + $CLICKHOUSE_CLIENT --multiquery --query " BEGIN TRANSACTION; SELECT throwIf((SELECT count() FROM tmp) != 0) FORMAT Null; INSERT INTO tmp SELECT 1, n*m FROM src; @@ -110,36 +153,69 @@ function thread_select_insert() SELECT throwIf(1 != (SELECT countDistinct(arr) FROM (SELECT x, arraySort(groupArray(nm)) AS arr FROM tmp WHERE x!=4 GROUP BY x))) FORMAT Null; SELECT throwIf((SELECT count(), sum(nm) FROM tmp WHERE x=4) != (SELECT count(), sum(nm) FROM tmp WHERE x!=4)) FORMAT Null; ROLLBACK;" - done } -thread_insert_commit 1 & PID_1=$! -thread_insert_commit 2 & PID_2=$! -thread_insert_rollback 3 & PID_3=$! +MAIN_TIME_PART=400 +SECOND_TIME_PART=30 +WAIT_FINISH=60 +LAST_TIME_GAP=10 -thread_optimize & PID_4=$! -thread_select & PID_5=$! -thread_select_insert & PID_6=$! -sleep 0.$RANDOM; -thread_select & PID_7=$! -thread_select_insert & PID_8=$! +if [[ $((MAIN_TIME_PART + SECOND_TIME_PART + WAIT_FINISH + LAST_TIME_GAP)) -ge 600 ]]; then + echo "time sttings are wrong" 2>&1 + exit 1 +fi -wait $PID_1 && wait $PID_2 && wait $PID_3 -kill -TERM $PID_4 -kill -TERM $PID_5 -kill -TERM $PID_6 -kill -TERM $PID_7 -kill -TERM $PID_8 -wait -wait_for_queries_to_finish 40 +START_TIME=$(get_now) +STOP_TIME=$((START_TIME + MAIN_TIME_PART)) +SECOND_STOP_TIME=$((STOP_TIME + SECOND_TIME_PART)) +MIN_ITERATIONS=20 + +run_until_deadline_and_at_least_times $STOP_TIME $MIN_ITERATIONS insert_commit_action 1 & PID_1=$! +run_until_deadline_and_at_least_times $STOP_TIME $MIN_ITERATIONS insert_commit_action 2 & PID_2=$! +run_until_deadline_and_at_least_times $STOP_TIME $MIN_ITERATIONS insert_rollback_action 3 & PID_3=$! + +run_until_deadline_and_at_least_times $SECOND_STOP_TIME $MIN_ITERATIONS optimize_action & PID_4=$! +run_until_deadline_and_at_least_times $SECOND_STOP_TIME $MIN_ITERATIONS select_action & PID_5=$! +run_until_deadline_and_at_least_times $SECOND_STOP_TIME $MIN_ITERATIONS select_insert_action & PID_6=$! +sleep 0.$RANDOM +run_until_deadline_and_at_least_times $SECOND_STOP_TIME $MIN_ITERATIONS select_action & PID_7=$! +run_until_deadline_and_at_least_times $SECOND_STOP_TIME $MIN_ITERATIONS select_insert_action & PID_8=$! + +wait $PID_1 || echo "insert_commit_action has failed with status $?" 2>&1 +wait $PID_2 || echo "second insert_commit_action has failed with status $?" 2>&1 +wait $PID_3 || echo "insert_rollback_action has failed with status $?" 2>&1 + +is_pid_exist $PID_4 || echo "optimize_action is not running" 2>&1 +is_pid_exist $PID_5 || echo "select_action is not running" 2>&1 +is_pid_exist $PID_6 || echo "select_insert_action is not running" 2>&1 +is_pid_exist $PID_7 || echo "second select_action is not running" 2>&1 +is_pid_exist $PID_8 || echo "second select_insert_action is not running" 2>&1 + +wait $PID_4 || echo "optimize_action has failed with status $?" 2>&1 +wait $PID_5 || echo "select_action has failed with status $?" 2>&1 +wait $PID_6 || echo "select_insert_action has failed with status $?" 2>&1 +wait $PID_7 || echo "second select_action has failed with status $?" 2>&1 +wait $PID_8 || echo "second select_insert_action has failed with status $?" 2>&1 + +wait_for_queries_to_finish $WAIT_FINISH $CLICKHOUSE_CLIENT --multiquery --query " -BEGIN TRANSACTION; -SELECT count(), sum(n), sum(m=1), sum(m=2), sum(m=3) FROM src; -SELECT count(), sum(nm) FROM mv"; + BEGIN TRANSACTION; + SELECT throwIf((SELECT (sum(n), count() % 2) FROM src) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT (sum(nm), count() % 2) FROM mv) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT (sum(nm), count() % 2) FROM dst) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(nm)) FROM dst)) FORMAT Null; + SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(n*m)) FROM src)) FORMAT Null; + COMMIT; +" -$CLICKHOUSE_CLIENT --query "SELECT count(), sum(n), sum(m=1), sum(m=2), sum(m=3) FROM src" -$CLICKHOUSE_CLIENT --query "SELECT count(), sum(nm) FROM mv" +$CLICKHOUSE_CLIENT --multiquery --query " + SELECT throwIf((SELECT (sum(n), count() % 2) FROM src) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT (sum(nm), count() % 2) FROM mv) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT (sum(nm), count() % 2) FROM dst) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(nm)) FROM dst)) FORMAT Null; + SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(n*m)) FROM src)) FORMAT Null; +" $CLICKHOUSE_CLIENT --query "DROP TABLE src"; $CLICKHOUSE_CLIENT --query "DROP TABLE dst"; diff --git a/tests/queries/0_stateless/01179_insert_values_semicolon.expect b/tests/queries/0_stateless/01179_insert_values_semicolon.expect index 534b18a9500..4b8693126a1 100755 --- a/tests/queries/0_stateless/01179_insert_values_semicolon.expect +++ b/tests/queries/0_stateless/01179_insert_values_semicolon.expect @@ -37,10 +37,10 @@ send -- "INSERT INTO test_01179 values ('foo'); \r" expect "Ok." send -- "INSERT INTO test_01179 values ('foo'); ('bar') \r" -expect "Cannot read data after semicolon" +expect "Syntax error" send -- "SELECT val, count() FROM test_01179 GROUP BY val FORMAT TSV\r" -expect "foo\t2" +expect "foo\t3" send -- "DROP TABLE test_01179\r" expect "Ok." diff --git a/tests/queries/0_stateless/01187_set_profile_as_setting.sh b/tests/queries/0_stateless/01187_set_profile_as_setting.sh index dacb609d790..42f596c45d6 100755 --- a/tests/queries/0_stateless/01187_set_profile_as_setting.sh +++ b/tests/queries/0_stateless/01187_set_profile_as_setting.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash # Tags: no-random-settings -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment, because the test has to use the readonly mode +CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01194_http_query_id.sh b/tests/queries/0_stateless/01194_http_query_id.sh index 5aebdc10dfc..42321112185 100755 --- a/tests/queries/0_stateless/01194_http_query_id.sh +++ b/tests/queries/0_stateless/01194_http_query_id.sh @@ -4,14 +4,22 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="http://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/?session_id=test_01194" -rnd=$RANDOM +rnd="$CLICKHOUSE_DATABASE" +url="${CLICKHOUSE_URL}&session_id=test_01194_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CURL} -sS "$url&query=SELECT+'test_01194',$rnd,1" > /dev/null ${CLICKHOUSE_CURL} -sS "$url&query=SELECT+'test_01194',$rnd,2" > /dev/null ${CLICKHOUSE_CURL} -sS "$url" --data "SELECT 'test_01194',$rnd,3" > /dev/null ${CLICKHOUSE_CURL} -sS "$url" --data "SELECT 'test_01194',$rnd,4" > /dev/null -${CLICKHOUSE_CURL} -sS "$url" --data "SYSTEM FLUSH LOGS" +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" -${CLICKHOUSE_CURL} -sS "$url&query=SELECT+count(DISTINCT+query_id)+FROM+system.query_log+WHERE+current_database+LIKE+currentDatabase()+AND+query+LIKE+'SELECT+''test_01194'',$rnd%25'" +$CLICKHOUSE_CLIENT -q " + SELECT + count(DISTINCT query_id) + FROM system.query_log + WHERE + current_database = currentDatabase() + AND event_date >= yesterday() + AND query LIKE 'SELECT ''test_01194'',$rnd%' + AND query_id != queryID()" diff --git a/tests/queries/0_stateless/01231_operator_null_in.sql b/tests/queries/0_stateless/01231_operator_null_in.sql index 0424a995b3f..26f342540e3 100644 --- a/tests/queries/0_stateless/01231_operator_null_in.sql +++ b/tests/queries/0_stateless/01231_operator_null_in.sql @@ -112,7 +112,7 @@ DROP TABLE IF EXISTS null_in_subquery; DROP TABLE IF EXISTS null_in_tuple; CREATE TABLE null_in_tuple (dt DateTime, idx int, t Tuple(Nullable(UInt64), Nullable(String))) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -INSERT INTO null_in_tuple VALUES (1, 1, (1, '1')) (2, 2, (2, NULL)) (3, 3, (NULL, '3')) (4, 4, (NULL, NULL)) +INSERT INTO null_in_tuple VALUES (1, 1, (1, '1')) (2, 2, (2, NULL)) (3, 3, (NULL, '3')) (4, 4, (NULL, NULL)); SET transform_null_in = 0; diff --git a/tests/queries/0_stateless/01238_http_memory_tracking.sh b/tests/queries/0_stateless/01238_http_memory_tracking.sh index 26d3dd8acd4..ce1310cf302 100755 --- a/tests/queries/0_stateless/01238_http_memory_tracking.sh +++ b/tests/queries/0_stateless/01238_http_memory_tracking.sh @@ -8,18 +8,22 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -o pipefail +MISTER_USER="test_01238_http_$RANDOM" +# Create another user to check limit on the amount of memory reliabily +${CLICKHOUSE_CLIENT} --format Null -q "CREATE USER $MISTER_USER" + # This is needed to keep at least one running query for user for the time of test. # (1k http queries takes ~1 second, let's run for 5x more to avoid flaps) -${CLICKHOUSE_CLIENT} --function_sleep_max_microseconds_per_block 5000000 --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)' & +${CLICKHOUSE_CLIENT} --user ${MISTER_USER} --function_sleep_max_microseconds_per_block 5000000 --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)' & # ignore "yes: standard output: Broken pipe" yes 'SELECT 1' 2>/dev/null | { head -n1000 } | { - xargs -I{} ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&max_memory_usage_for_user=$((1<<30))" -d '{}' + xargs -I{} ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${MISTER_USER}&wait_end_of_query=1&max_memory_usage_for_user=$((1<<30))" -d '{}' } | grep -x -c 1 wait # Reset max_memory_usage_for_user, so it will not affect other tests -${CLICKHOUSE_CLIENT} --max_memory_usage_for_user=0 -q "SELECT 1 FORMAT Null" +${CLICKHOUSE_CLIENT} --user ${MISTER_USER} --max_memory_usage_for_user=0 -q "SELECT 1 FORMAT Null" diff --git a/tests/queries/0_stateless/01246_buffer_flush.sh b/tests/queries/0_stateless/01246_buffer_flush.sh index 1ca953c80d9..aea91a0bf6b 100755 --- a/tests/queries/0_stateless/01246_buffer_flush.sh +++ b/tests/queries/0_stateless/01246_buffer_flush.sh @@ -5,72 +5,90 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -function elapsed_sec() +set -e + +function query() +{ + local query_id + if [[ $1 == --query_id ]]; then + query_id="&query_id=$2" + shift 2 + fi + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}$query_id" -d "$*" +} + +function wait_until() { local expr=$1 && shift - local start end - start=$(date +%s.%N) while ! eval "$expr"; do sleep 0.5 done - end=$(date +%s.%N) - $CLICKHOUSE_LOCAL -q "select floor($end-$start)" +} +function get_buffer_delay() +{ + local buffer_insert_id=$1 && shift + query "SYSTEM FLUSH LOGS" + query " + WITH + (SELECT event_time_microseconds FROM system.query_log WHERE current_database = '$CLICKHOUSE_DATABASE' AND type = 'QueryStart' AND query_id = '$buffer_insert_id') AS begin_, + (SELECT max(event_time) FROM data_01256) AS end_ + SELECT dateDiff('seconds', begin_, end_)::UInt64 + " } -$CLICKHOUSE_CLIENT -nm -q " - drop table if exists data_01256; - drop table if exists buffer_01256; - - create table data_01256 as system.numbers Engine=Memory(); -" +query "drop table if exists data_01256" +query "drop table if exists buffer_01256" +query "create table data_01256 (key UInt64, event_time DateTime(6) MATERIALIZED now64(6)) Engine=Memory()" echo "min" -$CLICKHOUSE_CLIENT -nm -q " - create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, +query " + create table buffer_01256 (key UInt64) Engine=Buffer($CLICKHOUSE_DATABASE, data_01256, 1, 2, 100, /* time */ 4, 100, /* rows */ 1, 1e6 /* bytes */ - ); - insert into buffer_01256 select * from system.numbers limit 5; - select count() from data_01256; + ) " -sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 5 ]]') +min_query_id=$(random_str 10) +query --query_id "$min_query_id" "insert into buffer_01256 select * from system.numbers limit 5" +query "select count() from data_01256" +wait_until '[[ $(query "select count() from data_01256") -eq 5 ]]' +sec=$(get_buffer_delay "$min_query_id") [[ $sec -ge 2 ]] || echo "Buffer flushed too early, min_time=2, flushed after $sec sec" [[ $sec -lt 100 ]] || echo "Buffer flushed too late, max_time=100, flushed after $sec sec" -$CLICKHOUSE_CLIENT -q "select count() from data_01256" -$CLICKHOUSE_CLIENT -q "drop table buffer_01256" +query "select count() from data_01256" +query "drop table buffer_01256" echo "max" -$CLICKHOUSE_CLIENT -nm -q " - create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, +query " + create table buffer_01256 (key UInt64) Engine=Buffer($CLICKHOUSE_DATABASE, data_01256, 1, 100, 2, /* time */ 0, 100, /* rows */ 0, 1e6 /* bytes */ - ); - insert into buffer_01256 select * from system.numbers limit 5; - select count() from data_01256; + ) " -sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 10 ]]') +max_query_id=$(random_str 10) +query --query_id "$max_query_id" "insert into buffer_01256 select * from system.numbers limit 5" +query "select count() from data_01256" +wait_until '[[ $(query "select count() from data_01256") -eq 10 ]]' +sec=$(get_buffer_delay "$max_query_id") [[ $sec -ge 2 ]] || echo "Buffer flushed too early, max_time=2, flushed after $sec sec" -$CLICKHOUSE_CLIENT -q "select count() from data_01256" -$CLICKHOUSE_CLIENT -q "drop table buffer_01256" +query "select count() from data_01256" +query "drop table buffer_01256" echo "direct" -$CLICKHOUSE_CLIENT -nm -q " - create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, +query " + create table buffer_01256 (key UInt64) Engine=Buffer($CLICKHOUSE_DATABASE, data_01256, 1, 100, 100, /* time */ 0, 9, /* rows */ 0, 1e6 /* bytes */ - ); - insert into buffer_01256 select * from system.numbers limit 10; - select count() from data_01256; + ) " +query "insert into buffer_01256 select * from system.numbers limit 10" +query "select count() from data_01256" echo "drop" -$CLICKHOUSE_CLIENT -nm -q " - insert into buffer_01256 select * from system.numbers limit 10; - drop table if exists buffer_01256; - select count() from data_01256; -" +query "insert into buffer_01256 select * from system.numbers limit 10" +query "drop table if exists buffer_01256" +query "select count() from data_01256" -$CLICKHOUSE_CLIENT -q "drop table data_01256" +query "drop table data_01256" diff --git a/tests/queries/0_stateless/01257_dictionary_mismatch_types.sql b/tests/queries/0_stateless/01257_dictionary_mismatch_types.sql index a4bb7bf2525..38349d8291b 100644 --- a/tests/queries/0_stateless/01257_dictionary_mismatch_types.sql +++ b/tests/queries/0_stateless/01257_dictionary_mismatch_types.sql @@ -98,7 +98,7 @@ ORDER BY (col1, col2, col3, col4, col5); SET input_format_null_as_default = 1; INSERT INTO test_dict_db.table1 VALUES ('id1',1,'20200127-1',701,'20200127-1-01',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:37:59',NULL,'2020-02-04 11:35:14','2020-02-08 05:32:04',NULL,NULL,'12345'),('id1',1,'20200127-1',701,'20200127-1-01',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:37:59',NULL,'2020-02-04 11:35:14','2020-02-08 05:32:04',NULL,NULL,'12345'),('id1',1,'20200127-1',702,'20200127-1-02',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:37:59',NULL,'2020-02-04 11:35:14','2020-02-08 05:32:04',NULL,NULL,'12345'),('id1',1,'20200127-1',703,'20200127-1-03',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:37:59',NULL,'2020-02-04 11:35:14','2020-02-08 05:32:04',NULL,NULL,'12345'),('id1',1,'20200127-1',704,'20200127-1-04',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:37:59',NULL,'2020-02-04 11:35:14','2020-02-08 05:32:04',NULL,NULL,'12345'),('id1',1,'20200127-1',705,'20200127-1-05',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:37:59',NULL,'2020-02-04 11:35:14','2020-02-08 05:32:04',NULL,NULL,'12345'),('id1',1,'20200202-1',711,'20200202-1-01',0,200,NULL,'C2','Hello','C40',NULL,1,'2020-02-03 11:07:57',NULL,NULL,NULL,'2020-02-03 11:09:23',NULL,NULL),('id1',1,'20200202-2',712,'20200202-2-01',0,0,NULL,'C3','bye','R40',NULL,1,'2020-02-03 14:13:10',NULL,'2020-02-03 16:11:31','2020-02-07 05:32:05','2020-02-07 11:18:15','2020-02-07 11:18:16','123455'),('id1',1,'20200202-2',713,'20200202-2-02',0,0,NULL,'C3','bye','R40',NULL,1,'2020-02-03 14:13:10',NULL,'2020-02-03 16:11:31','2020-02-07 05:32:05','2020-02-07 11:18:15','2020-02-07 11:18:16','123455'),('id1',2,'20200128-1',701,'20200128-1-01',0,0,NULL,'N1','Hi','N40',NULL,2,'2020-02-03 17:07:27',NULL,'2020-02-05 13:33:55','2020-02-13 05:32:04',NULL,NULL,'A123755'),('id1',2,'20200131-1',701,'20200131-1-01',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 13:07:17',NULL,'2020-02-04 13:47:55','2020-02-12 05:32:04',NULL,NULL,'A123485'),('id1',2,'20200201-1',701,'20200201-1-01',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 21:07:37',NULL,'2020-02-05 13:40:51','2020-02-13 05:32:04',NULL,NULL,'A123455'),('id1',2,'20200202-1',711,'20200202-1-01',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 02:06:54',NULL,'2020-02-04 13:36:45','2020-02-12 05:32:04',NULL,NULL,'A123459'),('id1',2,'20200202-1',712,'20200202-1-02',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 02:06:54',NULL,'2020-02-04 13:36:45','2020-02-12 05:32:04',NULL,NULL,'A123429'),('id2',1,'20200131-1',401,'20200131-1-01',0,210,'2020-02-16 05:22:04','N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-05 17:30:05','2020-02-09 05:32:05',NULL,NULL,'454545'),('id2',1,'20200131-1',402,'20200131-1-02',0,210,'2020-02-16 05:22:04','N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-05 17:30:05','2020-02-09 05:32:05',NULL,NULL,'454545'),('id2',1,'20200131-1',403,'20200131-1-03',0,270,'2020-02-16 05:22:04','N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-05 17:30:05','2020-02-09 05:32:05',NULL,NULL,'454545'),('id2',1,'20200131-1',404,'20200131-1-04',0,270,'2020-02-16 05:22:04','N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-05 17:30:05','2020-02-09 05:32:05',NULL,NULL,'454545'),('id2',1,'20200131-1',405,'20200131-1-05',0,380,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-11 16:52:58','2020-02-15 05:32:04',NULL,NULL,'6892144935823'),('id2',1,'20200131-1',406,'20200131-1-06',0,380,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-11 16:52:58','2020-02-15 05:32:04',NULL,NULL,'6892144935823'),('id2',1,'20200131-1',407,'20200131-1-07',0,280,NULL,'C2','Hello','C40',NULL,1,'2020-02-03 10:11:00',NULL,NULL,NULL,'2020-02-04 11:01:21',NULL,NULL),('id2',1,'20200131-1',408,'20200131-1-08',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-05 17:30:05','2020-02-09 05:32:04',NULL,NULL,'454545'),('id2',1,'20200201-1',401,'20200201-1-01',0,190,'2020-02-16 05:22:05','N1','Hi','N40',NULL,1,'2020-02-03 12:06:17',NULL,'2020-02-05 17:30:30','2020-02-09 05:32:03',NULL,NULL,'90071'),('id2',1,'20200201-1',402,'20200201-1-01',0,160,'2020-02-14 05:22:13','N1','Hi','N40',NULL,1,'2020-02-03 06:21:05',NULL,'2020-02-03 17:42:35','2020-02-07 05:32:04',NULL,NULL,'96575'),('id2',1,'20200201-1',403,'20200201-1-02',0,230,'2020-02-14 05:22:13','N1','Hi','N40',NULL,1,'2020-02-03 06:21:05',NULL,'2020-02-03 17:42:35','2020-02-07 05:32:04',NULL,NULL,'96575'),('id2',1,'20200202-1',404,'20200202-1-01',0,130,'2020-02-14 05:22:14','N1','Hi','N40',NULL,1,'2020-02-03 14:00:39',NULL,'2020-02-03 17:42:45','2020-02-07 05:32:04',NULL,NULL,'96850'),('id3',1,'20200130-1',391,'20200130-1-01',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:26:46',NULL,'2020-02-05 15:33:01','2020-02-08 05:32:05',NULL,NULL,'27243'),('id3',1,'20200130-1',392,'20200130-1-02',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:26:46',NULL,'2020-02-10 16:16:11','2020-02-13 05:32:06',NULL,NULL,'92512'),('id3',1,'20200131-1',393,'20200131-1-01',0,0,NULL,'C2','Hello','C40',NULL,1,'2020-02-03 10:24:38',NULL,NULL,NULL,'2020-02-05 14:04:40',NULL,NULL),('id3',1,'20200131-2',391,'20200131-1-01',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:22:08',NULL,'2020-02-06 14:27:06','2020-02-09 05:32:04',NULL,NULL,'46433'),('id3',1,'20200131-2',392,'20200131-1-02',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:22:08',NULL,'2020-02-06 14:27:06','2020-02-09 05:32:02',NULL,NULL,'46433'); -SYSTEM RELOAD DICTIONARIES; +SYSTEM RELOAD DICTIONARY test_dict_db.table1_dict; SELECT dictGet('test_dict_db.table1_dict', 'col6', (col1, col2, col3, col4, col5)), diff --git a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh index 713d187cd88..5f82731c54e 100755 --- a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh +++ b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh @@ -76,7 +76,7 @@ insert into data_01278 select reinterpretAsString(number), // s6 reinterpretAsString(number), // s7 reinterpretAsString(number) // s8 -from numbers(200000); -- { serverError 241 }" > /dev/null 2>&1 +from numbers(200000);" > /dev/null 2>&1 local ret_code=$? if [[ $ret_code -eq 0 ]]; then diff --git a/tests/queries/0_stateless/01299_alter_merge_tree.sql b/tests/queries/0_stateless/01299_alter_merge_tree.sql index 3c4467926f8..1fa354040f5 100644 --- a/tests/queries/0_stateless/01299_alter_merge_tree.sql +++ b/tests/queries/0_stateless/01299_alter_merge_tree.sql @@ -3,11 +3,11 @@ drop table if exists merge_tree; set allow_deprecated_syntax_for_merge_tree=1; create table merge_tree ( CounterID UInt32, StartDate Date, Sign Int8, VisitID UInt64, UserID UInt64, StartTime DateTime, ClickLogID UInt64) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), tuple(CounterID, StartDate, intHash32(UserID), VisitID, ClickLogID), 8192, Sign); -insert into merge_tree values (1, '2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3) +insert into merge_tree values (1, '2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3); alter table merge_tree add column dummy String after CounterID; describe table merge_tree; -insert into merge_tree values (1, 'Hello, Alter Table!','2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3) +insert into merge_tree values (1, 'Hello, Alter Table!','2013-09-19', 1, 0, 2, '2013-09-19 12:43:06', 3); select CounterID, dummy from merge_tree where dummy <> '' limit 10; diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh index 47fe7a9c7d9..d74092d828d 100755 --- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh index a521accb082..bbf2fd9177a 100755 --- a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01324_insert_tsv_raw.sql b/tests/queries/0_stateless/01324_insert_tsv_raw.sql index c3812730e5b..4827f34597a 100644 --- a/tests/queries/0_stateless/01324_insert_tsv_raw.sql +++ b/tests/queries/0_stateless/01324_insert_tsv_raw.sql @@ -1,7 +1,6 @@ drop table if exists tsv_raw; create table tsv_raw (strval String, intval Int64, b1 String, b2 String, b3 String, b4 String) engine = Memory; insert into tsv_raw format TSVRaw "a 1 \ \\ "\"" "\\"" -; select * from tsv_raw format TSVRaw; select * from tsv_raw format JSONCompactEachRow; diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.reference b/tests/queries/0_stateless/01338_long_select_and_alter.reference index c2678e7052e..027109252e1 100644 --- a/tests/queries/0_stateless/01338_long_select_and_alter.reference +++ b/tests/queries/0_stateless/01338_long_select_and_alter.reference @@ -1,3 +1,3 @@ -10 5 -CREATE TABLE default.alter_mt\n(\n `key` UInt64,\n `value` UInt64\n)\nENGINE = MergeTree\nORDER BY key\nSETTINGS index_granularity = 8192 +5 +CREATE TABLE default.alter_mt\n(\n `key` Int64,\n `value` Int64\n)\nENGINE = MergeTree\nORDER BY key\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index 2b0709162a3..5d2759ac884 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -7,16 +7,16 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" -$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENGINE=MergeTree() ORDER BY key" +$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key Int64, value String) ENGINE=MergeTree() ORDER BY key" -$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" +$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number - 1 AS x, toString(x) FROM numbers(5)" $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & -# to be sure that select took all required locks +# To be sure that select took all required locks for better test sensitivity, although it isn't guaranteed (then the test will also succeed). sleep 2 -$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value UInt64" +$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value Int64" $CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference index b4ed8efab63..65e638bc3a4 100644 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference @@ -1,3 +1,3 @@ -10 5 -CREATE TABLE default.alter_mt\n(\n `key` UInt64,\n `value` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01338_long_select_and_alter_zookeeper_default/alter_mt\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +5 +CREATE TABLE default.alter_mt\n(\n `key` Int64,\n `value` Int64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01338_long_select_and_alter_zookeeper_default/alter_mt\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index 41e0a12f369..593a96a7cc8 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -7,16 +7,16 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" -$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_mt', '1') ORDER BY key" +$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key Int64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_mt', '1') ORDER BY key" -$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" +$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number - 1 AS x, toString(x) FROM numbers(5)" $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & -# to be sure that select took all required locks +# To be sure that select took all required locks for better test sensitivity, although it isn't guaranteed (then the test will also succeed). sleep 2 -$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value UInt64" +$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value Int64" $CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" diff --git a/tests/queries/0_stateless/01396_low_cardinality_fixed_string_default.sql b/tests/queries/0_stateless/01396_low_cardinality_fixed_string_default.sql index e31c402559e..f0c89cdf826 100644 --- a/tests/queries/0_stateless/01396_low_cardinality_fixed_string_default.sql +++ b/tests/queries/0_stateless/01396_low_cardinality_fixed_string_default.sql @@ -6,6 +6,7 @@ CREATE TABLE test ) ENGINE = MergeTree() PARTITION BY id ORDER BY id; INSERT INTO test FORMAT CSV 1,RU + INSERT INTO test FORMAT CSV 1, SELECT * FROM test ORDER BY code; diff --git a/tests/queries/0_stateless/01448_json_compact_strings_each_row.sql b/tests/queries/0_stateless/01448_json_compact_strings_each_row.sql index 06374c633e5..52f0eba8ba4 100644 --- a/tests/queries/0_stateless/01448_json_compact_strings_each_row.sql +++ b/tests/queries/0_stateless/01448_json_compact_strings_each_row.sql @@ -24,51 +24,64 @@ SELECT 5; /* Check JSONCompactStringsEachRow Input */ CREATE TABLE test_table (v1 String, v2 UInt8, v3 DEFAULT v2 * 16, v4 UInt8 DEFAULT 8) ENGINE = MergeTree() ORDER BY v2; INSERT INTO test_table FORMAT JSONCompactStringsEachRow ["first", "1", "2", "3"] ["second", "2", "3", "6"]; + SELECT * FROM test_table FORMAT JSONCompactStringsEachRow; TRUNCATE TABLE test_table; SELECT 6; /* Check input_format_null_as_default = 1 */ SET input_format_null_as_default = 1; INSERT INTO test_table FORMAT JSONCompactStringsEachRow ["first", "1", "2", "ᴺᵁᴸᴸ"] ["second", "2", "null", "6"]; + SELECT * FROM test_table FORMAT JSONCompactStringsEachRow; TRUNCATE TABLE test_table; SELECT 7; /* Check Nested */ CREATE TABLE test_table_2 (v1 UInt8, n Nested(id UInt8, name String)) ENGINE = MergeTree() ORDER BY v1; INSERT INTO test_table_2 FORMAT JSONCompactStringsEachRow ["16", "[15, 16, 17]", "['first', 'second', 'third']"]; + SELECT * FROM test_table_2 FORMAT JSONCompactStringsEachRow; TRUNCATE TABLE test_table_2; SELECT 8; /* Check JSONCompactStringsEachRowWithNames and JSONCompactStringsEachRowWithNamesAndTypes Input */ SET input_format_null_as_default = 0; INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", "1", "2", "3"]["second", "2", "3", "6"]; + INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNames ["v1", "v2", "v3", "v4"]["first", "1", "2", "3"]["second", "2", "3", "6"]; + SELECT * FROM test_table FORMAT JSONCompactStringsEachRow; TRUNCATE TABLE test_table; SELECT 9; /* Check input_format_null_as_default = 1 */ SET input_format_null_as_default = 1; INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", "1", "2", "null"] ["second", "2", "null", "6"]; + INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNames ["v1", "v2", "v3", "v4"]["first", "1", "2", "null"] ["second", "2", "null", "6"]; + SELECT * FROM test_table FORMAT JSONCompactStringsEachRow; SELECT 10; /* Check Header */ TRUNCATE TABLE test_table; SET input_format_skip_unknown_fields = 1; INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v1", "v2", "invalid_column"]["String", "UInt8", "UInt8"]["first", "1", "32"]["second", "2", "64"]; + INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNames ["v1", "v2", "invalid_column"]["first", "1", "32"]["second", "2", "64"]; + SELECT * FROM test_table FORMAT JSONCompactStringsEachRow; SELECT 11; TRUNCATE TABLE test_table; INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v4", "v2", "v3"]["UInt8", "UInt8", "UInt16"]["1", "2", "3"] + INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNames ["v4", "v2", "v3"]["1", "2", "3"] + SELECT * FROM test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes; SELECT '---------'; SELECT * FROM test_table FORMAT JSONCompactStringsEachRowWithNames; SELECT 12; /* Check Nested */ INSERT INTO test_table_2 FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v1", "n.id", "n.name"]["UInt8", "Array(UInt8)", "Array(String)"]["16", "[15, 16, 17]", "['first', 'second', 'third']"]; + INSERT INTO test_table_2 FORMAT JSONCompactStringsEachRowWithNames ["v1", "n.id", "n.name"]["16", "[15, 16, 17]", "['first', 'second', 'third']"]; + SELECT * FROM test_table_2 FORMAT JSONCompactStringsEachRowWithNamesAndTypes; SELECT '---------'; SELECT * FROM test_table_2 FORMAT JSONCompactStringsEachRowWithNames; diff --git a/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh b/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh index fa9238041b1..ec9c5134059 100755 --- a/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh +++ b/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh @@ -11,12 +11,17 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS mem" $CLICKHOUSE_CLIENT -q "CREATE TABLE mem (x UInt64) engine = Memory" function f { + local TIMELIMIT=$((SECONDS+$1)) for _ in $(seq 1 300); do $CLICKHOUSE_CLIENT -q "SELECT count() FROM (SELECT * FROM mem SETTINGS max_threads=2) FORMAT Null;" + if [ $SECONDS -ge "$TIMELIMIT" ]; then + break + fi done } function g { + local TIMELIMIT=$((SECONDS+$1)) for _ in $(seq 1 100); do $CLICKHOUSE_CLIENT -n -q " INSERT INTO mem SELECT number FROM numbers(1000000); @@ -30,14 +35,18 @@ function g { INSERT INTO mem VALUES (1); TRUNCATE TABLE mem; " + if [ $SECONDS -ge "$TIMELIMIT" ]; then + break + fi done } export -f f; export -f g; -timeout 20 bash -c f > /dev/null & -timeout 20 bash -c g > /dev/null & +TIMEOUT=20 +f $TIMEOUT & +g $TIMEOUT & wait $CLICKHOUSE_CLIENT -q "DROP TABLE mem" diff --git a/tests/queries/0_stateless/01508_partition_pruning_long.sh b/tests/queries/0_stateless/01508_partition_pruning_long.sh deleted file mode 100755 index 7b56d8bbf03..00000000000 --- a/tests/queries/0_stateless/01508_partition_pruning_long.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-polymorphic-parts, no-random-settings, no-random-merge-tree-settings, no-debug - -# Description of test result: -# Test the correctness of the partition pruning -# -# Script executes queries from a file 01508_partition_pruning_long.queries (1 line = 1 query) -# Queries are started with 'select' (but NOT with 'SELECT') are executed with log_level=debug - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - - -queries="${CURDIR}/01508_partition_pruning_long.queries" -while IFS= read -r sql -do - [ -z "$sql" ] && continue - if [[ "$sql" == select* ]] ; - then - echo "$sql" - ${CLICKHOUSE_CLIENT} --query "$sql" - CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g') - ${CLICKHOUSE_CLIENT} --query "$sql" 2>&1 | grep -oh "Selected .* parts by partition key, *. parts by primary key, .* marks by primary key, .* marks to read from .* ranges.*$" - CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/--send_logs_level=debug/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/g') - echo "" - else - ${CLICKHOUSE_CLIENT} --query "$sql" - fi -done < "$queries" diff --git a/tests/queries/0_stateless/01508_partition_pruning_long.reference b/tests/queries/0_stateless/01508_partition_pruning_long_1.reference similarity index 50% rename from tests/queries/0_stateless/01508_partition_pruning_long.reference rename to tests/queries/0_stateless/01508_partition_pruning_long_1.reference index afdb4257505..3ea4cc4f6ee 100644 --- a/tests/queries/0_stateless/01508_partition_pruning_long.reference +++ b/tests/queries/0_stateless/01508_partition_pruning_long_1.reference @@ -123,122 +123,3 @@ select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 2 2 20000 Selected 2/3 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges ---------- tDD ---------------------------- -select uniqExact(_part), count() from tDD where toDate(d)=toDate('2020-09-24'); -1 10000 -Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() FROM tDD WHERE toDate(d) = toDate('2020-09-24'); -1 10000 -Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() FROM tDD WHERE toDate(d) = '2020-09-24'; -1 10000 -Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() FROM tDD WHERE toDate(d) >= '2020-09-23' and toDate(d) <= '2020-09-26'; -3 40000 -Selected 3/4 parts by partition key, 3 parts by primary key, 4/4 marks by primary key, 4 marks to read from 3 ranges - -select uniqExact(_part), count() FROM tDD WHERE toYYYYMMDD(d) >= 20200923 and toDate(d) <= '2020-09-26'; -3 40000 -Selected 3/4 parts by partition key, 3 parts by primary key, 4/4 marks by primary key, 4 marks to read from 3 ranges - ---------- sDD ---------------------------- -select uniqExact(_part), count() from sDD; -6 30000 -Selected 6/6 parts by partition key, 6 parts by primary key, 6/6 marks by primary key, 6 marks to read from 6 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1)+1 = 202010; -3 9999 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202010; -2 9999 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202110; -0 0 -Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toStartOfDay(toDateTime(intDiv(d,1000),'UTC')) < toDateTime('2020-10-02 00:00:00','UTC'); -3 11440 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toDateTime(intDiv(d,1000),'UTC') < toDateTime('2020-10-01 00:00:00','UTC'); -2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from sDD where d >= 1598918400000; -4 20000 -Selected 4/6 parts by partition key, 4 parts by primary key, 4/4 marks by primary key, 4 marks to read from 4 ranges - -select uniqExact(_part), count() from sDD where d >= 1598918400000 and toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) < 202010; -3 10001 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges - ---------- xMM ---------------------------- -select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; -2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00'; -3 10001 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00'; -2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a=1; -1 1 -Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; -2 5001 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; -1 5000 -Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-11-01 00:00:00' and a = 1; -2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where a = 1; -3 15000 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from xMM where a = 66; -0 0 -Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges - -select uniqExact(_part), count() from xMM where a <> 66; -6 30000 -Selected 6/6 parts by partition key, 6 parts by primary key, 6/6 marks by primary key, 6 marks to read from 6 ranges - -select uniqExact(_part), count() from xMM where a = 2; -2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where a = 1; -2 15000 -Selected 2/5 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; -1 10000 -Selected 1/5 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from xMM where a <> 66; -5 30000 -Selected 5/5 parts by partition key, 5 parts by primary key, 5/5 marks by primary key, 5 marks to read from 5 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; -2 5001 -Selected 2/5 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; -1 5000 -Selected 1/5 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - diff --git a/tests/queries/0_stateless/01508_partition_pruning_long_1.sh b/tests/queries/0_stateless/01508_partition_pruning_long_1.sh new file mode 100755 index 00000000000..512cf8f5265 --- /dev/null +++ b/tests/queries/0_stateless/01508_partition_pruning_long_1.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# Tags: long, no-polymorphic-parts, no-random-settings, no-random-merge-tree-settings, no-debug + +# Description of test result: +# Test the correctness of the partition pruning +# +# Script executes queries from a file 01508_partition_pruning_long.queries (1 line = 1 query) +# Queries are started with 'select' (but NOT with 'SELECT') are executed with log_level=debug + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +while IFS= read -r sql +do + [ -z "$sql" ] && continue + if [[ "$sql" == select* ]] ; + then + echo "$sql" + ${CLICKHOUSE_CLIENT} --query "$sql" + CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g') + ${CLICKHOUSE_CLIENT} --query "$sql" 2>&1 | grep -oh "Selected .* parts by partition key, *. parts by primary key, .* marks by primary key, .* marks to read from .* ranges.*$" + CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/--send_logs_level=debug/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/g') + echo "" + else + ${CLICKHOUSE_CLIENT} --query "$sql" + fi +done <<< " +DROP TABLE IF EXISTS tMM; + +CREATE TABLE tMM(d DateTime('Asia/Istanbul'), a Int64) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY tuple() SETTINGS index_granularity = 8192; +SYSTEM STOP MERGES tMM; +INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); + +SELECT '--------- tMM ----------------------------'; +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-15'); +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-01'); +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-10-15'); +select uniqExact(_part), count() from tMM where toDate(d)='2020-09-15'; +select uniqExact(_part), count() from tMM where toYYYYMM(d)=202009; +select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816; +select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015; +select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15'; +select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00'; +select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00', 'Asia/Istanbul'); +select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul'); +select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00'; +select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00'; +select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00'; +select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-10-01 00:00:00'; +select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-15 00:00:00' and d < '2020-10-16 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202009; +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010 and toStartOfDay(d) = '2020-10-01 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d) >= 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d) > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010 and toStartOfDay(d) < '2020-10-02 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; +select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-15'; +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01'; +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01' and toStartOfMonth(d) < '2020-10-01'; + +SYSTEM START MERGES tMM; +OPTIMIZE TABLE tMM FINAL; + +select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; + +DROP TABLE tMM; +" diff --git a/tests/queries/0_stateless/01508_partition_pruning_long_2.reference b/tests/queries/0_stateless/01508_partition_pruning_long_2.reference new file mode 100644 index 00000000000..bc767f17031 --- /dev/null +++ b/tests/queries/0_stateless/01508_partition_pruning_long_2.reference @@ -0,0 +1,119 @@ +--------- tDD ---------------------------- +select uniqExact(_part), count() from tDD where toDate(d)=toDate('2020-09-24'); +1 10000 +Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() FROM tDD WHERE toDate(d) = toDate('2020-09-24'); +1 10000 +Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() FROM tDD WHERE toDate(d) = '2020-09-24'; +1 10000 +Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() FROM tDD WHERE toDate(d) >= '2020-09-23' and toDate(d) <= '2020-09-26'; +3 40000 +Selected 3/4 parts by partition key, 3 parts by primary key, 4/4 marks by primary key, 4 marks to read from 3 ranges + +select uniqExact(_part), count() FROM tDD WHERE toYYYYMMDD(d) >= 20200923 and toDate(d) <= '2020-09-26'; +3 40000 +Selected 3/4 parts by partition key, 3 parts by primary key, 4/4 marks by primary key, 4 marks to read from 3 ranges + +--------- sDD ---------------------------- +select uniqExact(_part), count() from sDD; +6 30000 +Selected 6/6 parts by partition key, 6 parts by primary key, 6/6 marks by primary key, 6 marks to read from 6 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1)+1 = 202010; +3 9999 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202010; +2 9999 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202110; +0 0 +Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toStartOfDay(toDateTime(intDiv(d,1000),'UTC')) < toDateTime('2020-10-02 00:00:00','UTC'); +3 11440 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toDateTime(intDiv(d,1000),'UTC') < toDateTime('2020-10-01 00:00:00','UTC'); +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from sDD where d >= 1598918400000; +4 20000 +Selected 4/6 parts by partition key, 4 parts by primary key, 4/4 marks by primary key, 4 marks to read from 4 ranges + +select uniqExact(_part), count() from sDD where d >= 1598918400000 and toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) < 202010; +3 10001 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges + +--------- xMM ---------------------------- +select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00'; +3 10001 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00'; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a=1; +1 1 +Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; +2 5001 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; +1 5000 +Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-11-01 00:00:00' and a = 1; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where a = 1; +3 15000 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from xMM where a = 66; +0 0 +Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from xMM where a <> 66; +6 30000 +Selected 6/6 parts by partition key, 6 parts by primary key, 6/6 marks by primary key, 6 marks to read from 6 ranges + +select uniqExact(_part), count() from xMM where a = 2; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where a = 1; +2 15000 +Selected 2/5 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; +1 10000 +Selected 1/5 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from xMM where a <> 66; +5 30000 +Selected 5/5 parts by partition key, 5 parts by primary key, 5/5 marks by primary key, 5 marks to read from 5 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; +2 5001 +Selected 2/5 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; +1 5000 +Selected 1/5 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + diff --git a/tests/queries/0_stateless/01508_partition_pruning_long.queries b/tests/queries/0_stateless/01508_partition_pruning_long_2.sh old mode 100644 new mode 100755 similarity index 58% rename from tests/queries/0_stateless/01508_partition_pruning_long.queries rename to tests/queries/0_stateless/01508_partition_pruning_long_2.sh index 0d64fc05f0f..3f8a89bdb20 --- a/tests/queries/0_stateless/01508_partition_pruning_long.queries +++ b/tests/queries/0_stateless/01508_partition_pruning_long_2.sh @@ -1,15 +1,35 @@ -DROP TABLE IF EXISTS tMM; +#!/usr/bin/env bash +# Tags: long, no-polymorphic-parts, no-random-settings, no-random-merge-tree-settings, no-debug + +# Description of test result: +# Test the correctness of the partition pruning +# +# Script executes queries from a file 01508_partition_pruning_long.queries (1 line = 1 query) +# Queries are started with 'select' (but NOT with 'SELECT') are executed with log_level=debug + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +while IFS= read -r sql +do + [ -z "$sql" ] && continue + if [[ "$sql" == select* ]] ; + then + echo "$sql" + ${CLICKHOUSE_CLIENT} --query "$sql" + CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g') + ${CLICKHOUSE_CLIENT} --query "$sql" 2>&1 | grep -oh "Selected .* parts by partition key, *. parts by primary key, .* marks by primary key, .* marks to read from .* ranges.*$" + CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/--send_logs_level=debug/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/g') + echo "" + else + ${CLICKHOUSE_CLIENT} --query "$sql" + fi +done <<< " DROP TABLE IF EXISTS tDD; DROP TABLE IF EXISTS sDD; DROP TABLE IF EXISTS xMM; -CREATE TABLE tMM(d DateTime('Asia/Istanbul'), a Int64) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY tuple() SETTINGS index_granularity = 8192; -SYSTEM STOP MERGES tMM; -INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); CREATE TABLE tDD(d DateTime('Asia/Istanbul'),a Int) ENGINE = MergeTree PARTITION BY toYYYYMMDD(d) ORDER BY tuple() SETTINGS index_granularity = 8192; SYSTEM STOP MERGES tDD; @@ -34,44 +54,6 @@ INSERT INTO xMM SELECT toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul') + numb INSERT INTO xMM SELECT toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul') + number*60, 1, number FROM numbers(5000); -SELECT '--------- tMM ----------------------------'; -select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-15'); -select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-01'); -select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-10-15'); -select uniqExact(_part), count() from tMM where toDate(d)='2020-09-15'; -select uniqExact(_part), count() from tMM where toYYYYMM(d)=202009; -select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816; -select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015; -select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15'; -select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00'; -select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00', 'Asia/Istanbul'); -select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul'); -select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00'; -select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00'; -select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00'; -select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-10-01 00:00:00'; -select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-15 00:00:00' and d < '2020-10-16 00:00:00'; -select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; -select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202009; -select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010 and toStartOfDay(d) = '2020-10-01 00:00:00'; -select uniqExact(_part), count() from tMM where toYYYYMM(d) >= 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; -select uniqExact(_part), count() from tMM where toYYYYMM(d) > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; -select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; -select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010 and toStartOfDay(d) < '2020-10-02 00:00:00'; -select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; -select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; -select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-15'; -select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01'; -select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01' and toStartOfMonth(d) < '2020-10-01'; - -SYSTEM START MERGES tMM; -OPTIMIZE TABLE tMM FINAL; - -select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; -select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; -select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; - - SELECT '--------- tDD ----------------------------'; SYSTEM START MERGES tDD; OPTIMIZE TABLE tDD FINAL; @@ -116,9 +98,7 @@ select uniqExact(_part), count() from xMM where a <> 66; select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; -DROP TABLE tMM; DROP TABLE tDD; DROP TABLE sDD; DROP TABLE xMM; - - +" diff --git a/tests/queries/0_stateless/01514_input_format_csv_enum_as_number_setting.sql b/tests/queries/0_stateless/01514_input_format_csv_enum_as_number_setting.sql index 526af60434f..9e1783b7ba0 100644 --- a/tests/queries/0_stateless/01514_input_format_csv_enum_as_number_setting.sql +++ b/tests/queries/0_stateless/01514_input_format_csv_enum_as_number_setting.sql @@ -8,6 +8,7 @@ CREATE TABLE table_with_enum_column_for_csv_insert ( SET input_format_csv_enum_as_number = 1; INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2 + SELECT * FROM table_with_enum_column_for_csv_insert; SET input_format_csv_enum_as_number = 0; diff --git a/tests/queries/0_stateless/01514_input_format_json_enum_as_number.sql b/tests/queries/0_stateless/01514_input_format_json_enum_as_number.sql index 0b2cb8c64e2..a5044cd6875 100644 --- a/tests/queries/0_stateless/01514_input_format_json_enum_as_number.sql +++ b/tests/queries/0_stateless/01514_input_format_json_enum_as_number.sql @@ -8,6 +8,7 @@ CREATE TABLE table_with_enum_column_for_json_insert ( ) ENGINE=Memory(); INSERT INTO table_with_enum_column_for_json_insert FORMAT JSONEachRow {"Id":102,"Value":2} + SELECT * FROM table_with_enum_column_for_json_insert; DROP TABLE IF EXISTS table_with_enum_column_for_json_insert; diff --git a/tests/queries/0_stateless/01514_input_format_tsv_enum_as_number_setting.sql b/tests/queries/0_stateless/01514_input_format_tsv_enum_as_number_setting.sql index 033d7d282f0..5ad94eeb20e 100644 --- a/tests/queries/0_stateless/01514_input_format_tsv_enum_as_number_setting.sql +++ b/tests/queries/0_stateless/01514_input_format_tsv_enum_as_number_setting.sql @@ -8,7 +8,9 @@ CREATE TABLE table_with_enum_column_for_tsv_insert ( SET input_format_tsv_enum_as_number = 1; INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2 + INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TabSeparatedRaw 103 1 + SELECT * FROM table_with_enum_column_for_tsv_insert ORDER BY Id; SET input_format_tsv_enum_as_number = 0; diff --git a/tests/queries/0_stateless/01558_enum_as_num_in_tsv_csv_input.sql b/tests/queries/0_stateless/01558_enum_as_num_in_tsv_csv_input.sql index 6a0f2a97b4f..4dccfda4bc5 100644 --- a/tests/queries/0_stateless/01558_enum_as_num_in_tsv_csv_input.sql +++ b/tests/queries/0_stateless/01558_enum_as_num_in_tsv_csv_input.sql @@ -6,15 +6,23 @@ CREATE TABLE enum_as_num ( ) ENGINE=Memory(); INSERT INTO enum_as_num FORMAT TSV 1 1 + INSERT INTO enum_as_num FORMAT TSV 2 2 + INSERT INTO enum_as_num FORMAT TSV 3 3 + INSERT INTO enum_as_num FORMAT TSV 4 a + INSERT INTO enum_as_num FORMAT TSV 5 b INSERT INTO enum_as_num FORMAT CSV 6,1 + INSERT INTO enum_as_num FORMAT CSV 7,2 + INSERT INTO enum_as_num FORMAT CSV 8,3 + INSERT INTO enum_as_num FORMAT CSV 9,a + INSERT INTO enum_as_num FORMAT CSV 10,b SELECT * FROM enum_as_num ORDER BY Id; diff --git a/tests/queries/0_stateless/01564_test_hint_woes.reference b/tests/queries/0_stateless/01564_test_hint_woes.reference index adb4cc61816..56c07922ccb 100644 --- a/tests/queries/0_stateless/01564_test_hint_woes.reference +++ b/tests/queries/0_stateless/01564_test_hint_woes.reference @@ -20,7 +20,7 @@ select 1; insert into values_01564 values (11); -- { serverError VIOLATED_CONSTRAINT } select nonexistent column; -- { serverError UNKNOWN_IDENTIFIER } -- query after values on the same line -insert into values_01564 values (1); select 1; +insert into values_01564 values (1); select 1; 1 -- a failing insert and then a normal insert (#https://github.com/ClickHouse/ClickHouse/issues/19353) diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh index 2e47034e528..7215f270a4c 100755 --- a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -15,17 +16,26 @@ verify_sql="SELECT # In case of test failure, this code will do infinite loop and timeout. verify() { - while true + for i in {1..5000} do - result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" ) - [ "$result" = "1" ] && break + result=$( $CLICKHOUSE_CLIENT --query="$verify_sql" ) + [ "$result" = "1" ] && echo "$result" && break sleep 0.1 + + if [[ $i -eq 5000 ]] + then + $CLICKHOUSE_CLIENT --multiquery " + SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics; + SELECT sum(active), sum(NOT active) FROM system.parts; + SELECT sum(active), sum(NOT active) FROM system.projection_parts; + SELECT count() FROM system.dropped_tables_parts; + " + fi done - echo 1 } $CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS test_table" -$CLICKHOUSE_CLIENT --query="CREATE TABLE test_table(data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE test_table (data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" $CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-01')" verify diff --git a/tests/queries/0_stateless/01622_constraints_simple_optimization.reference b/tests/queries/0_stateless/01622_constraints_simple_optimization.reference index d267df2237f..84c872856ff 100644 --- a/tests/queries/0_stateless/01622_constraints_simple_optimization.reference +++ b/tests/queries/0_stateless/01622_constraints_simple_optimization.reference @@ -32,10 +32,10 @@ 1 1 0 -SELECT count() AS `count()` +SELECT count() FROM constraint_test_constants WHERE (b > 100) OR (c > 100) -SELECT count() AS `count()` +SELECT count() FROM constraint_test_constants WHERE c > 100 QUERY id: 0 @@ -53,7 +53,7 @@ QUERY id: 0 COLUMN id: 6, column_name: c, result_type: Int64, source_id: 3 CONSTANT id: 7, constant_value: UInt64_100, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 -SELECT count() AS `count()` +SELECT count() FROM constraint_test_constants WHERE c > 100 QUERY id: 0 @@ -71,7 +71,7 @@ QUERY id: 0 COLUMN id: 6, column_name: c, result_type: Int64, source_id: 3 CONSTANT id: 7, constant_value: UInt64_100, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 -SELECT count() AS `count()` +SELECT count() FROM constraint_test_constants QUERY id: 0 PROJECTION COLUMNS diff --git a/tests/queries/0_stateless/01623_constraints_column_swap.reference b/tests/queries/0_stateless/01623_constraints_column_swap.reference index 555a4c93f70..d504a86365b 100644 --- a/tests/queries/0_stateless/01623_constraints_column_swap.reference +++ b/tests/queries/0_stateless/01623_constraints_column_swap.reference @@ -1,6 +1,6 @@ SELECT - (b AS `cityHash64(a)`) + 10 AS `plus(cityHash64(a), 10)`, - (b AS b) + 3 AS `plus(b, 3)` + (b AS `cityHash64(a)`) + 10, + (b AS b) + 3 FROM column_swap_test_test WHERE b = 1 QUERY id: 0 @@ -59,8 +59,8 @@ QUERY id: 0 CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT - (b AS `cityHash64(a)`) + 10 AS `plus(cityHash64(a), 10)`, - (b AS b) + 3 AS `plus(b, 3)` + (b AS `cityHash64(a)`) + 10, + (b AS b) + 3 FROM column_swap_test_test WHERE b = 0 QUERY id: 0 @@ -89,8 +89,8 @@ QUERY id: 0 CONSTANT id: 14, constant_value: UInt64_0, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT - (b AS `cityHash64(a)`) + 10 AS `plus(cityHash64(a), 10)`, - (b AS b) + 3 AS `plus(b, 3)` + (b AS `cityHash64(a)`) + 10, + (b AS b) + 3 FROM column_swap_test_test WHERE b = 0 QUERY id: 0 @@ -119,8 +119,8 @@ QUERY id: 0 CONSTANT id: 14, constant_value: UInt64_0, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT - (b AS `cityHash64(a)`) + 10 AS `plus(cityHash64(a), 10)`, - (b AS b) + 3 AS `plus(b, 3)` + (b AS `cityHash64(a)`) + 10, + (b AS b) + 3 FROM column_swap_test_test WHERE b = 1 QUERY id: 0 @@ -148,7 +148,7 @@ QUERY id: 0 COLUMN id: 13, column_name: b, result_type: UInt64, source_id: 5 CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 -SELECT (b AS `cityHash64(a)`) + 10 AS `plus(cityHash64(a), 10)` +SELECT (b AS `cityHash64(a)`) + 10 FROM column_swap_test_test WHERE b = 0 QUERY id: 0 @@ -171,8 +171,8 @@ QUERY id: 0 CONSTANT id: 10, constant_value: UInt64_0, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT - (cityHash64(a) AS `cityHash64(a)`) + 10 AS `plus(cityHash64(a), 10)`, - a AS a + (cityHash64(a) AS `cityHash64(a)`) + 10, + a FROM column_swap_test_test WHERE cityHash64(a) = 0 QUERY id: 0 @@ -203,8 +203,8 @@ QUERY id: 0 CONSTANT id: 15, constant_value: UInt64_0, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT - (cityHash64(a) AS b) + 10 AS `plus(b, 10)`, - a AS a + (cityHash64(a) AS b) + 10, + a FROM column_swap_test_test WHERE cityHash64(a) = 0 QUERY id: 0 diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference b/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference deleted file mode 100644 index 3da44c57b27..00000000000 --- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.reference +++ /dev/null @@ -1,12 +0,0 @@ -10000000 -10000000 1274991808 -20000000 -20000000 2549983616 -10000000 -10000000 1274991808 -20000000 -20000000 2549983616 -10000000 -10000000 1274991808 -20000000 -20000000 2549983616 diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql deleted file mode 100644 index d405bb01fd9..00000000000 --- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql +++ /dev/null @@ -1,48 +0,0 @@ -set allow_suspicious_low_cardinality_types=1; -drop table if exists perf_lc_num; - -CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = TinyLog; - -INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); - -select sum(length(arr)) from perf_lc_num; -select sum(length(arr)), sum(num) from perf_lc_num; - -INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); - -select sum(length(arr)) from perf_lc_num; -select sum(length(arr)), sum(num) from perf_lc_num; - -drop table if exists perf_lc_num; - - -CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = Log; - -INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); - -select sum(length(arr)) from perf_lc_num; -select sum(length(arr)), sum(num) from perf_lc_num; - -INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); - -select sum(length(arr)) from perf_lc_num; -select sum(length(arr)), sum(num) from perf_lc_num; - -drop table if exists perf_lc_num; - - -CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = StripeLog; - -INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); - -select sum(length(arr)) from perf_lc_num; -select sum(length(arr)), sum(num) from perf_lc_num; - -INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); - -select sum(length(arr)) from perf_lc_num; -select sum(length(arr)), sum(num) from perf_lc_num; - -drop table if exists perf_lc_num; - - diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log_1.reference b/tests/queries/0_stateless/01651_lc_insert_tiny_log_1.reference new file mode 100644 index 00000000000..0715a4212ed --- /dev/null +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log_1.reference @@ -0,0 +1,4 @@ +10000000 +10000000 1274991808 +30000000 +30000000 3824991808 diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log_1.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log_1.sql new file mode 100644 index 00000000000..142d3b21b47 --- /dev/null +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log_1.sql @@ -0,0 +1,16 @@ +set allow_suspicious_low_cardinality_types = 1, max_rows_to_read = '31M'; +drop table if exists perf_lc_num; + +CREATE TABLE perf_lc_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] ) ENGINE = TinyLog; + +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); + +select sum(length(arr)) from perf_lc_num; +select sum(length(arr)), sum(num) from perf_lc_num; + +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000, 20000000); + +select sum(length(arr)) from perf_lc_num; +select sum(length(arr)), sum(num) from perf_lc_num; + +drop table if exists perf_lc_num; diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log_2.reference b/tests/queries/0_stateless/01651_lc_insert_tiny_log_2.reference new file mode 100644 index 00000000000..0715a4212ed --- /dev/null +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log_2.reference @@ -0,0 +1,4 @@ +10000000 +10000000 1274991808 +30000000 +30000000 3824991808 diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log_2.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log_2.sql new file mode 100644 index 00000000000..e6b68e1d682 --- /dev/null +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log_2.sql @@ -0,0 +1,16 @@ +set allow_suspicious_low_cardinality_types = 1, max_rows_to_read = '31M'; +drop table if exists perf_lc_num; + +CREATE TABLE perf_lc_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] ) ENGINE = Log; + +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); + +select sum(length(arr)) from perf_lc_num; +select sum(length(arr)), sum(num) from perf_lc_num; + +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000, 20000000); + +select sum(length(arr)) from perf_lc_num; +select sum(length(arr)), sum(num) from perf_lc_num; + +drop table if exists perf_lc_num; diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log_3.reference b/tests/queries/0_stateless/01651_lc_insert_tiny_log_3.reference new file mode 100644 index 00000000000..0715a4212ed --- /dev/null +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log_3.reference @@ -0,0 +1,4 @@ +10000000 +10000000 1274991808 +30000000 +30000000 3824991808 diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log_3.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log_3.sql new file mode 100644 index 00000000000..85a42163897 --- /dev/null +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log_3.sql @@ -0,0 +1,16 @@ +set allow_suspicious_low_cardinality_types = 1, max_rows_to_read = '31M'; +drop table if exists perf_lc_num; + +CREATE TABLE perf_lc_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] ) ENGINE = StripeLog; + +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); + +select sum(length(arr)) from perf_lc_num; +select sum(length(arr)), sum(num) from perf_lc_num; + +INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000, 20000000); + +select sum(length(arr)) from perf_lc_num; +select sum(length(arr)), sum(num) from perf_lc_num; + +drop table if exists perf_lc_num; diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python index 9072dfeb09f..f363cb64018 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -10,18 +10,38 @@ DEBUG_LOG = os.path.join( os.path.basename(os.path.abspath(__file__)).strip(".python") + ".debuglog", ) +STATE_MAP = { + -1: "process did not start", + 0: "completion was found", + 1: "process started and said ':)'", + 2: "completion search was started", + 3: "completion is missing", +} + def run_with_timeout(func, args, timeout): - process = multiprocessing.Process(target=func, args=args) - process.start() - process.join(timeout) + for _ in range(5): + state = multiprocessing.Value("i", -1) + process = multiprocessing.Process( + target=func, args=args, kwargs={"state": state} + ) + process.start() + process.join(timeout) - if process.is_alive(): - process.terminate() - print("Timeout") + if state.value in (0, 3): + return + + if process.is_alive(): + process.terminate() + + if state.value == -1: + continue + + print(f"Timeout, state: {STATE_MAP[state.value]}") + return -def test_completion(program, argv, comp_word): +def test_completion(program, argv, comp_word, state=None): comp_begin = comp_word[:-3] shell_pid, master = pty.fork() @@ -41,6 +61,8 @@ def test_completion(program, argv, comp_word): debug_log_fd.write(repr(output_b) + "\n") debug_log_fd.flush() + state.value = 1 + os.write(master, b"SET " + bytes(comp_begin.encode())) output_b = os.read(master, 4096) output = output_b.decode() @@ -55,23 +77,28 @@ def test_completion(program, argv, comp_word): time.sleep(0.01) os.write(master, b"\t") + state.value = 2 + output_b = os.read(master, 4096) output = output_b.decode() debug_log_fd.write(repr(output_b) + "\n") debug_log_fd.flush() - # fail fast if there is a bell character in the output, - # meaning no concise completion is found - if "\x07" in output: - print(f"{comp_word}: FAIL") - return while not comp_word in output: + # fail fast if there is a bell character in the output, + # meaning no concise completion is found + if "\x07" in output: + print(f"{comp_word}: FAIL") + state.value = 3 + return + output_b = os.read(master, 4096) output += output_b.decode() debug_log_fd.write(repr(output_b) + "\n") debug_log_fd.flush() print(f"{comp_word}: OK") + state.value = 0 finally: os.close(master) debug_log_fd.close() diff --git a/tests/queries/0_stateless/01753_direct_dictionary_simple_key.sql b/tests/queries/0_stateless/01753_direct_dictionary_simple_key.sql index 86af09f391d..93ed3f93c4e 100644 --- a/tests/queries/0_stateless/01753_direct_dictionary_simple_key.sql +++ b/tests/queries/0_stateless/01753_direct_dictionary_simple_key.sql @@ -41,7 +41,7 @@ SELECT dictGetOrDefault('01753_dictionary_db.direct_dictionary_simple_key_simple SELECT 'dictHas'; SELECT dictHas('01753_dictionary_db.direct_dictionary_simple_key_simple_attributes', number) FROM system.numbers LIMIT 4; SELECT 'select all values as input stream'; -SELECT * FROM 01753_dictionary_db.direct_dictionary_simple_key_simple_attributes; +SELECT * FROM 01753_dictionary_db.direct_dictionary_simple_key_simple_attributes ORDER BY ALL; DROP DICTIONARY 01753_dictionary_db.direct_dictionary_simple_key_simple_attributes; DROP TABLE 01753_dictionary_db.simple_key_simple_attributes_source_table; diff --git a/tests/queries/0_stateless/01821_table_comment.reference b/tests/queries/0_stateless/01821_table_comment.reference index 05acabae3d4..cdd87df43d2 100644 --- a/tests/queries/0_stateless/01821_table_comment.reference +++ b/tests/queries/0_stateless/01821_table_comment.reference @@ -1,4 +1,8 @@ -t1 this is a temtorary table +t1 this is a temporary table t2 this is a MergeTree table t3 this is a Log table -CREATE TABLE default.t1\n(\n `n` Int8\n)\nENGINE = Memory\nCOMMENT \'this is a temtorary table\' +t4 this is a Kafka table +t5 this is a EmbeddedRocksDB table +t6 this is a Executable table +t7 this is a WindowView table +CREATE TABLE default.t1\n(\n `n` Int8\n)\nENGINE = Memory\nCOMMENT \'this is a temporary table\' diff --git a/tests/queries/0_stateless/01821_table_comment.sql b/tests/queries/0_stateless/01821_table_comment.sql index 4bd71d3e278..4946e46d37a 100644 --- a/tests/queries/0_stateless/01821_table_comment.sql +++ b/tests/queries/0_stateless/01821_table_comment.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel +-- Tags: no-parallel, no-fasttest DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; @@ -9,7 +9,7 @@ CREATE TABLE t1 `n` Int8 ) ENGINE = Memory -COMMENT 'this is a temtorary table'; +COMMENT 'this is a temporary table'; CREATE TABLE t2 ( @@ -26,14 +26,57 @@ CREATE TABLE t3 ENGINE = Log COMMENT 'this is a Log table'; +CREATE TABLE t4 +( + `n` Int8 +) +ENGINE = Kafka +SETTINGS + kafka_broker_list = 'localhost:10000', + kafka_topic_list = 'test', + kafka_group_name = 'test', + kafka_format = 'JSONEachRow' +COMMENT 'this is a Kafka table'; + +CREATE TABLE t5 +( + `n` Int8 +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY n +COMMENT 'this is a EmbeddedRocksDB table'; + +CREATE TABLE t6 +( + `n` Int8 +) +ENGINE = Executable('script.py', TabSeparated) +COMMENT 'this is a Executable table'; + +SET allow_experimental_window_view = 1; +-- New analyzer doesn't support WindowView tables +SET allow_experimental_analyzer = 0; + +CREATE WINDOW VIEW t7 +( + `n` Int8 +) +ENGINE MergeTree +ORDER BY n +AS SELECT 1 +GROUP BY tumble(now(), toIntervalDay('1')) +COMMENT 'this is a WindowView table'; + +SET allow_experimental_analyzer = 1; + SELECT name, comment FROM system.tables -WHERE name IN ('t1', 't2', 't3') AND database = currentDatabase() order by name; +WHERE name IN ('t1', 't2', 't3', 't4', 't5', 't6', 't7') + AND database = currentDatabase() order by name; SHOW CREATE TABLE t1; -DROP TABLE t1; -DROP TABLE t2; -DROP TABLE t3; +DROP TABLE t1, t2, t3, t4, t5, t6; +DROP VIEW t7; diff --git a/tests/queries/0_stateless/01825_type_json_1.sql b/tests/queries/0_stateless/01825_type_json_1.sql index 6876349677e..dd7c1d2b41f 100644 --- a/tests/queries/0_stateless/01825_type_json_1.sql +++ b/tests/queries/0_stateless/01825_type_json_1.sql @@ -10,6 +10,7 @@ ENGINE = MergeTree ORDER BY tuple(); SYSTEM STOP MERGES t_json; INSERT INTO t_json FORMAT JSONEachRow {"id": 1, "data": {"k1": "aa", "k2": {"k3": "bb", "k4": "c"}}} {"id": 2, "data": {"k1": "ee", "k5": "ff"}}; + INSERT INTO t_json FORMAT JSONEachRow {"id": 3, "data": {"k5":"foo"}}; SELECT id, data.k1, data.k2.k3, data.k2.k4, data.k5 FROM t_json ORDER BY id; @@ -32,6 +33,7 @@ SELECT '============'; TRUNCATE TABLE t_json; INSERT INTO t_json FORMAT JSONEachRow {"id": 1, "data": {"k1":[{"k2":"aaa","k3":[{"k4":"bbb"},{"k4":"ccc"}]},{"k2":"ddd","k3":[{"k4":"eee"},{"k4":"fff"}]}]}}; + SELECT id, data.k1.k2, data.k1.k3.k4 FROM t_json ORDER BY id; SELECT name, column, type @@ -82,4 +84,5 @@ ORDER BY name; DROP TABLE IF EXISTS t_json; -CREATE TABLE t_json(id UInt64, data Object('JSON')) ENGINE = Log; -- { serverError ILLEGAL_COLUMN } +-- Restore it after it is fixed. +-- CREATE TABLE t_json(id UInt64, data Object('JSON')) ENGINE = Log; -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/01825_type_json_10.sql b/tests/queries/0_stateless/01825_type_json_10.sql index 98f1a766ed8..e13026770f6 100644 --- a/tests/queries/0_stateless/01825_type_json_10.sql +++ b/tests/queries/0_stateless/01825_type_json_10.sql @@ -7,6 +7,7 @@ DROP TABLE IF EXISTS t_json_10; CREATE TABLE t_json_10 (o JSON) ENGINE = Memory; INSERT INTO t_json_10 FORMAT JSONAsObject {"a": {"b": 1, "c": [{"d": 10, "e": [31]}, {"d": 20, "e": [63, 127]}]}} {"a": {"b": 2, "c": []}} + INSERT INTO t_json_10 FORMAT JSONAsObject {"a": {"b": 3, "c": [{"f": 20, "e": [32]}, {"f": 30, "e": [64, 128]}]}} {"a": {"b": 4, "c": []}} SELECT DISTINCT toTypeName(o) FROM t_json_10; diff --git a/tests/queries/0_stateless/01825_type_json_17.sql b/tests/queries/0_stateless/01825_type_json_17.sql index ee5cf590407..dfbeae2b1fb 100644 --- a/tests/queries/0_stateless/01825_type_json_17.sql +++ b/tests/queries/0_stateless/01825_type_json_17.sql @@ -13,6 +13,7 @@ CREATE FUNCTION hasValidSizes17 AS (arr1, arr2) -> length(arr1) = length(arr2) A SYSTEM STOP MERGES t_json_17; INSERT INTO t_json_17 FORMAT JSONAsObject {"id": 1, "arr": [{"k1": [{"k2": "aaa", "k3": "bbb"}, {"k2": "ccc"}]}]} + INSERT INTO t_json_17 FORMAT JSONAsObject {"id": 2, "arr": [{"k1": [{"k3": "ddd", "k4": 10}, {"k4": 20}], "k5": {"k6": "foo"}}]} SELECT toTypeName(obj) FROM t_json_17 LIMIT 1; @@ -24,6 +25,7 @@ SELECT obj.arr.k1.k4 FROM t_json_17 ORDER BY obj.id; TRUNCATE TABLE t_json_17; INSERT INTO t_json_17 FORMAT JSONAsObject {"id": 1, "arr": [{"k1": [{"k2": "aaa"}]}]} + INSERT INTO t_json_17 FORMAT JSONAsObject {"id": 2, "arr": [{"k1": [{"k2": "bbb", "k3": [{"k4": 10}]}, {"k2": "ccc", "k3": [{"k4": 20}]}]}]} SELECT toTypeName(obj) FROM t_json_17 LIMIT 1; @@ -35,7 +37,9 @@ SELECT obj.arr.k1.k3.k4 FROM t_json_17 ORDER BY obj.id; TRUNCATE TABLE t_json_17; INSERT INTO t_json_17 FORMAT JSONAsObject {"id": 1, "arr": [{"k3": "qqq"}, {"k3": "www"}]} + INSERT INTO t_json_17 FORMAT JSONAsObject {"id": 2, "arr": [{"k1": [{"k2": "aaa"}], "k3": "eee"}]} + INSERT INTO t_json_17 FORMAT JSONAsObject {"id": 3, "arr": [{"k1": [{"k2": "bbb", "k4": [{"k5": 10}]}, {"k2": "ccc", "k4": [{"k5": 20}]}], "k3": "rrr"}]} SELECT toTypeName(obj) FROM t_json_17 LIMIT 1; diff --git a/tests/queries/0_stateless/01825_type_json_18.sql b/tests/queries/0_stateless/01825_type_json_18.sql index b493982a12c..26bab5ff9e5 100644 --- a/tests/queries/0_stateless/01825_type_json_18.sql +++ b/tests/queries/0_stateless/01825_type_json_18.sql @@ -8,9 +8,11 @@ CREATE TABLE t_json_2(id UInt64, data Object('JSON')) ENGINE = MergeTree ORDER BY tuple(); INSERT INTO t_json_2 FORMAT JSONEachRow {"id": 1, "data" : {"k1": 1}}; + SELECT id, data, toTypeName(data) FROM t_json_2 ORDER BY id; TRUNCATE TABLE t_json_2; INSERT INTO t_json_2 FORMAT JSONEachRow {"id": 1, "data" : {"k1": [1, 2]}}; + SELECT id, data, toTypeName(data) FROM t_json_2 ORDER BY id; diff --git a/tests/queries/0_stateless/01825_type_json_9.sql b/tests/queries/0_stateless/01825_type_json_9.sql index 8fa4b335578..8f6bf2ff205 100644 --- a/tests/queries/0_stateless/01825_type_json_9.sql +++ b/tests/queries/0_stateless/01825_type_json_9.sql @@ -7,6 +7,7 @@ SET allow_experimental_object_type = 1; CREATE TABLE t_json(id UInt64, obj JSON) ENGINE = MergeTree ORDER BY id; INSERT INTO t_json format JSONEachRow {"id": 1, "obj": {"foo": 1, "k1": 2}}; + INSERT INTO t_json format JSONEachRow {"id": 2, "obj": {"foo": 1, "k2": 2}}; OPTIMIZE TABLE t_json FINAL; diff --git a/tests/queries/0_stateless/01825_type_json_in_array.sql b/tests/queries/0_stateless/01825_type_json_in_array.sql index e5c20d7ba6b..fc89bad05d4 100644 --- a/tests/queries/0_stateless/01825_type_json_in_array.sql +++ b/tests/queries/0_stateless/01825_type_json_in_array.sql @@ -6,6 +6,7 @@ DROP TABLE IF EXISTS t_json_array; CREATE TABLE t_json_array (id UInt32, arr Array(JSON)) ENGINE = MergeTree ORDER BY id; INSERT INTO t_json_array FORMAT JSONEachRow {"id": 1, "arr": [{"k1": 1, "k2": {"k3": 2, "k4": 3}}, {"k1": 2, "k2": {"k5": "foo"}}]} + INSERT INTO t_json_array FORMAT JSONEachRow {"id": 2, "arr": [{"k1": 3, "k2": {"k3": 4, "k4": 5}}]} SET output_format_json_named_tuples_as_objects = 1; @@ -18,6 +19,7 @@ SELECT toTypeName(arr) FROM t_json_array LIMIT 1; TRUNCATE TABLE t_json_array; INSERT INTO t_json_array FORMAT JSONEachRow {"id": 1, "arr": [{"k1": [{"k2": "aaa", "k3": "bbb"}, {"k2": "ccc"}]}]} + INSERT INTO t_json_array FORMAT JSONEachRow {"id": 2, "arr": [{"k1": [{"k3": "ddd", "k4": 10}, {"k4": 20}], "k5": {"k6": "foo"}}]} SELECT * FROM t_json_array ORDER BY id FORMAT JSONEachRow; diff --git a/tests/queries/0_stateless/01825_type_json_insert_select.sql b/tests/queries/0_stateless/01825_type_json_insert_select.sql index cd0b280c360..0c83a35cce3 100644 --- a/tests/queries/0_stateless/01825_type_json_insert_select.sql +++ b/tests/queries/0_stateless/01825_type_json_insert_select.sql @@ -9,12 +9,14 @@ CREATE TABLE type_json_src (id UInt32, data JSON) ENGINE = MergeTree ORDER BY id CREATE TABLE type_json_dst AS type_json_src; INSERT INTO type_json_src VALUES (1, '{"k1": 1, "k2": "foo"}'); + INSERT INTO type_json_dst SELECT * FROM type_json_src; SELECT DISTINCT toTypeName(data) FROM type_json_dst; SELECT id, data FROM type_json_dst ORDER BY id; INSERT INTO type_json_src VALUES (2, '{"k1": 2, "k2": "bar"}') (3, '{"k1": 3, "k3": "aaa"}'); + INSERT INTO type_json_dst SELECT * FROM type_json_src WHERE id > 1; SELECT DISTINCT toTypeName(data) FROM type_json_dst; @@ -23,10 +25,12 @@ SELECT id, data FROM type_json_dst ORDER BY id; INSERT INTO type_json_dst VALUES (4, '{"arr": [{"k11": 5, "k22": 6}, {"k11": 7, "k33": 8}]}'); INSERT INTO type_json_src VALUES (5, '{"arr": "not array"}'); + INSERT INTO type_json_dst SELECT * FROM type_json_src WHERE id = 5; -- { serverError INCOMPATIBLE_COLUMNS } TRUNCATE TABLE type_json_src; -INSERT INTO type_json_src VALUES (5, '{"arr": [{"k22": "str1"}]}') +INSERT INTO type_json_src VALUES (5, '{"arr": [{"k22": "str1"}]}'); + INSERT INTO type_json_dst SELECT * FROM type_json_src WHERE id = 5; SELECT DISTINCT toTypeName(data) FROM type_json_dst; @@ -45,6 +49,7 @@ SET max_insert_threads = 1; SET output_format_json_named_tuples_as_objects = 1; INSERT INTO type_json_src FORMAT JSONAsString {"k1": 1, "k10": [{"a": "1", "b": "2"}, {"a": "2", "b": "3"}]}; + INSERT INTO type_json_src FORMAT JSONAsString {"k1": 2, "k10": [{"a": "1", "b": "2", "c": {"k11": "haha"}}]}; INSERT INTO type_json_dst SELECT data FROM type_json_src; @@ -57,6 +62,7 @@ TRUNCATE TABLE type_json_dst; -- Insert in another order. Order is important, because a way how defaults are filled differs. INSERT INTO type_json_src FORMAT JSONAsString {"k1": 2, "k10": [{"a": "1", "b": "2", "c": {"k11": "haha"}}]}; + INSERT INTO type_json_src FORMAT JSONAsString {"k1": 1, "k10": [{"a": "1", "b": "2"}, {"a": "2", "b": "3"}]}; INSERT INTO type_json_dst SELECT data FROM type_json_src; diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh index 4d7e79fae52..a6b83eba27d 100755 --- a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" # Rate limit is chosen for operation to spent more than one second. -seq 1 1000 | pv --quiet --rate-limit 500 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" +seq 1 1000 | pv --quiet --rate-limit 400 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" # We check that the value of NetworkReceiveElapsedMicroseconds correctly includes the time spent waiting data from the client. ${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/01925_json_as_string_data_in_square_brackets.sql b/tests/queries/0_stateless/01925_json_as_string_data_in_square_brackets.sql index 3b1e5176ff9..6e6f306c109 100644 --- a/tests/queries/0_stateless/01925_json_as_string_data_in_square_brackets.sql +++ b/tests/queries/0_stateless/01925_json_as_string_data_in_square_brackets.sql @@ -3,8 +3,11 @@ DROP TABLE IF EXISTS json_square_brackets; CREATE TABLE json_square_brackets (field String) ENGINE = Memory; INSERT INTO json_square_brackets FORMAT JSONAsString [{"id": 1, "name": "name1"}, {"id": 2, "name": "name2"}]; + INSERT INTO json_square_brackets FORMAT JSONAsString[]; + INSERT INTO json_square_brackets FORMAT JSONAsString [ ] ; + INSERT INTO json_square_brackets FORMAT JSONEachRow ; SELECT * FROM json_square_brackets; diff --git a/tests/queries/0_stateless/01943_query_id_check.sql b/tests/queries/0_stateless/01943_query_id_check.sql index ad9e88e0478..74c2dad8897 100644 --- a/tests/queries/0_stateless/01943_query_id_check.sql +++ b/tests/queries/0_stateless/01943_query_id_check.sql @@ -16,7 +16,7 @@ SELECT query FROM system.query_log WHERE initial_query_id = (SELECT * FROM tmp) DROP TABLE tmp; CREATE TABLE tmp (str String) ENGINE = Log; -INSERT INTO tmp (*) VALUES ('a') +INSERT INTO tmp (*) VALUES ('a'); SELECT count() FROM (SELECT initialQueryID() FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID()); SELECT count() FROM (SELECT queryID() FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID()); SELECT count() FROM (SELECT queryID() AS t FROM remote('127.0.0.{1..3}', currentDatabase(), 'tmp') GROUP BY queryID() HAVING t == initialQueryID()); diff --git a/tests/queries/0_stateless/02002_parse_map_int_key.sql b/tests/queries/0_stateless/02002_parse_map_int_key.sql index 8d009037dd7..c4b48e3d2b6 100644 --- a/tests/queries/0_stateless/02002_parse_map_int_key.sql +++ b/tests/queries/0_stateless/02002_parse_map_int_key.sql @@ -2,7 +2,6 @@ DROP TABLE IF EXISTS t_map_int_key; CREATE TABLE t_map_int_key (m1 Map(UInt32, UInt32), m2 Map(Date, UInt32)) ENGINE = Memory; INSERT INTO t_map_int_key FORMAT CSV "{1:2, 3: 4, 5 :6, 7 : 8}","{'2021-05-20':1, '2021-05-21': 2, '2021-05-22' :3, '2021-05-23' : 4}" -; SELECT m1, m2 FROM t_map_int_key; diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh index 05e48de771d..e20590a6d45 100755 --- a/tests/queries/0_stateless/02050_client_profile_events.sh +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -21,11 +21,17 @@ echo 'regression test for overlap profile events snapshots between queries (clic $CLICKHOUSE_LOCAL --print-profile-events --profile-events-delay-ms=-1 -n -q 'select 1; select 1' |& grep -F -o '[ 0 ] SelectedRows: 1 (increment)' echo 'print everything' -profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')" +profile_events="$( + $CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events -q 'select sleep(1) from numbers(2) format Null' |& + $CLICKHOUSE_LOCAL --input-format LineAsString --query "SELECT sum(extract(line, 'SelectedRows: (\\d+)')::UInt64) FROM table WHERE line LIKE '%SelectedRows: %'" +)" test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" echo 'print each 100 ms' -profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events --profile-events-delay-ms=100 -q 'select sleep(0.2) from numbers(10) format Null' |& grep -c 'SelectedRows')" +profile_events="$( + $CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events --profile-events-delay-ms=100 -q 'select sleep(0.2) from numbers(10) format Null' |& + $CLICKHOUSE_LOCAL --input-format LineAsString --query "SELECT sum(extract(line, 'SelectedRows: (\\d+)')::UInt64) FROM table WHERE line LIKE '%SelectedRows: %'" +)" test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" echo 'check that ProfileEvents is new for each query' diff --git a/tests/queries/0_stateless/02105_backslash_letter_commands.expect b/tests/queries/0_stateless/02105_backslash_letter_commands.expect index 7e78b7e7280..f09e9613a87 100755 --- a/tests/queries/0_stateless/02105_backslash_letter_commands.expect +++ b/tests/queries/0_stateless/02105_backslash_letter_commands.expect @@ -33,7 +33,6 @@ expect "Syntax error: " expect ":) " send -- " \\l ; \\d; \r" -expect "Syntax error (Multi-statements are not allowed): " expect ":) " send -- " \\l ;\r" diff --git a/tests/queries/0_stateless/02114_bool_type.sql b/tests/queries/0_stateless/02114_bool_type.sql index d4ea4e54028..6ff9206bb59 100644 --- a/tests/queries/0_stateless/02114_bool_type.sql +++ b/tests/queries/0_stateless/02114_bool_type.sql @@ -15,22 +15,31 @@ SELECT value,f FROM bool_test where value > 0; set bool_true_representation='True'; set bool_false_representation='False'; + INSERT INTO bool_test (value,f) FORMAT CSV True,test + INSERT INTO bool_test (value,f) FORMAT TSV False test + SELECT value,f FROM bool_test order by value FORMAT CSV; SELECT value,f FROM bool_test order by value FORMAT TSV; set bool_true_representation='Yes'; set bool_false_representation='No'; + INSERT INTO bool_test (value,f) FORMAT CSV Yes,test + INSERT INTO bool_test (value,f) FORMAT TSV No test + SELECT value,f FROM bool_test order by value FORMAT CSV; SELECT value,f FROM bool_test order by value FORMAT TSV; set bool_true_representation='On'; set bool_false_representation='Off'; + INSERT INTO bool_test (value,f) FORMAT CSV On,test + INSERT INTO bool_test (value,f) FORMAT TSV Off test + SELECT value,f FROM bool_test order by value FORMAT CSV; SELECT value,f FROM bool_test order by value FORMAT TSV; diff --git a/tests/queries/0_stateless/02184_ipv6_select_parsing.sql b/tests/queries/0_stateless/02184_ipv6_select_parsing.sql index 2892de309c4..ba5399555f8 100644 --- a/tests/queries/0_stateless/02184_ipv6_select_parsing.sql +++ b/tests/queries/0_stateless/02184_ipv6_select_parsing.sql @@ -2,8 +2,11 @@ drop table if exists ips_v6; create table ips_v6(i IPv6) Engine=Memory; INSERT INTO ips_v6 SELECT toIPv6('::ffff:127.0.0.1'); + INSERT INTO ips_v6 values ('::ffff:127.0.0.1'); + INSERT INTO ips_v6 FORMAT TSV ::ffff:127.0.0.1 + INSERT INTO ips_v6 SELECT ('::ffff:127.0.0.1'); SELECT * FROM ips_v6; diff --git a/tests/queries/0_stateless/02192_comment_error.reference b/tests/queries/0_stateless/02192_comment_error.reference index 21da4d2be9e..5f097eb8df6 100644 --- a/tests/queries/0_stateless/02192_comment_error.reference +++ b/tests/queries/0_stateless/02192_comment_error.reference @@ -1,5 +1,5 @@ -OK -OK +FAIL +FAIL OK OK OK diff --git a/tests/queries/0_stateless/02192_comment_error.sh b/tests/queries/0_stateless/02192_comment_error.sh index 78ff474ae84..d48c95a6419 100755 --- a/tests/queries/0_stateless/02192_comment_error.sh +++ b/tests/queries/0_stateless/02192_comment_error.sh @@ -4,8 +4,10 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh +# just stop, no exception ${CLICKHOUSE_CLIENT} --query="#" 2>&1 | grep -F -q 'Syntax error' && echo 'OK' || echo 'FAIL' ${CLICKHOUSE_CLIENT} --query="#not a comemnt" 2>&1 | grep -F -q 'Syntax error' && echo 'OK' || echo 'FAIL' +# syntax error ${CLICKHOUSE_CLIENT} --query="select 1 #not a comemnt" 2>&1 | grep -F -q 'Syntax error' && echo 'OK' || echo 'FAIL' ${CLICKHOUSE_CLIENT} --query="select 1 #" 2>&1 | grep -F -q 'Syntax error' && echo 'OK' || echo 'FAIL' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "select 42 #" 2>&1 | grep -F -q 'Syntax error' && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02193_async_insert_tcp_client_1.sql b/tests/queries/0_stateless/02193_async_insert_tcp_client_1.sql index 86db9d04aa0..a232a4196db 100644 --- a/tests/queries/0_stateless/02193_async_insert_tcp_client_1.sql +++ b/tests/queries/0_stateless/02193_async_insert_tcp_client_1.sql @@ -6,9 +6,7 @@ DROP TABLE IF EXISTS t_async_insert_02193_1; CREATE TABLE t_async_insert_02193_1 (id UInt32, s String) ENGINE = Memory; -INSERT INTO t_async_insert_02193_1 SETTINGS async_insert = 1 FORMAT CSV -1,aaa -; +INSERT INTO t_async_insert_02193_1 SETTINGS async_insert = 1 FORMAT CSV 1,aaa INSERT INTO t_async_insert_02193_1 SETTINGS async_insert = 1 FORMAT Values (2, 'bbb'); diff --git a/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql index 3d6b69fe161..7cc267b7263 100644 --- a/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql +++ b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-tsan drop table if exists buffer_02231; drop table if exists out_02231; @@ -11,8 +11,8 @@ create table buffer_02231 ( key Int, v1 AggregateFunction(groupArray, String) -) engine=Buffer(currentDatabase(), 'out_02231', - /* layers= */1, +) engine = Buffer(currentDatabase(), 'out_02231', + /* layers= */ 1, /* min/max time */ 86400, 86400, /* min/max rows */ 1e9, 1e9, /* min/max bytes */ 1e12, 1e12, @@ -29,7 +29,7 @@ from in_02231 group by key; set optimize_trivial_insert_select = 1; -insert into in_02231 select * from numbers(10e6) settings max_memory_usage='400Mi', max_threads=1; +insert into in_02231 select * from numbers(5e6) settings max_memory_usage='400Mi', max_threads=1; drop table buffer_02231; drop table out_02231; diff --git a/tests/queries/0_stateless/02234_clickhouse_local_test_mode.reference b/tests/queries/0_stateless/02234_clickhouse_local_test_mode.reference index d3605d15a3d..2c94e483710 100644 --- a/tests/queries/0_stateless/02234_clickhouse_local_test_mode.reference +++ b/tests/queries/0_stateless/02234_clickhouse_local_test_mode.reference @@ -1,2 +1,2 @@ OK -Missing columns +OK diff --git a/tests/queries/0_stateless/02234_clickhouse_local_test_mode.sh b/tests/queries/0_stateless/02234_clickhouse_local_test_mode.sh index eb4a91bd850..a6c47d80fa9 100755 --- a/tests/queries/0_stateless/02234_clickhouse_local_test_mode.sh +++ b/tests/queries/0_stateless/02234_clickhouse_local_test_mode.sh @@ -5,5 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_LOCAL --query="SELECT n SETTINGS allow_experimental_analyzer = 1" 2>&1 | grep -q "Code: 47. DB::Exception:" && echo 'OK' || echo 'FAIL' ||: -$CLICKHOUSE_LOCAL --query="SELECT n SETTINGS allow_experimental_analyzer = 0 -- { serverError 47 }" 2>&1 | grep -o 'Missing columns' +$CLICKHOUSE_LOCAL --query="SELECT n SETTINGS allow_experimental_analyzer = 1" 2>&1 | grep -q "Code: 47. DB::Exception:" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_LOCAL --query="SELECT n SETTINGS allow_experimental_analyzer = 0" 2>&1 | grep -q "Code: 47. DB::Exception:" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh index 0b6b9f461b0..ffc38c0c1bd 100755 --- a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh +++ b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh @@ -28,7 +28,7 @@ ORDER BY tuple(); INSERT INTO t_01411_num (num) SELECT number % 1000 FROM numbers(100000); -create table lc_dict_reading (val UInt64, str StringWithDictionary, pat String) engine = MergeTree order by val; +create table lc_dict_reading (val UInt64, str LowCardinality(String), pat String) engine = MergeTree order by val; insert into lc_dict_reading select number, if(number < 8192 * 4, number % 100, number) as s, s from system.numbers limit 100000; """ diff --git a/tests/queries/0_stateless/02240_asof_join_biginteger.reference b/tests/queries/0_stateless/02240_asof_join_biginteger.reference index cac55eec430..f7eb4d74375 100644 --- a/tests/queries/0_stateless/02240_asof_join_biginteger.reference +++ b/tests/queries/0_stateless/02240_asof_join_biginteger.reference @@ -2,3 +2,7 @@ 0 340282366920938463463374607431768211457 0 18446744073709551617 0 340282366920938463463374607431768211457 +0 18446744073709551617 +0 340282366920938463463374607431768211457 +0 18446744073709551617 +0 340282366920938463463374607431768211457 diff --git a/tests/queries/0_stateless/02240_asof_join_biginteger.sql b/tests/queries/0_stateless/02240_asof_join_biginteger.sql index 6dc5b00f116..a5c1faae4ea 100644 --- a/tests/queries/0_stateless/02240_asof_join_biginteger.sql +++ b/tests/queries/0_stateless/02240_asof_join_biginteger.sql @@ -3,3 +3,11 @@ select * from (select 0 as k, toInt256('340282366920938463463374607431768211457' select * from (select 0 as k, toUInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toUInt128('18446744073709551616') as v) t2 using(k, v); select * from (select 0 as k, toUInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toUInt256('340282366920938463463374607431768211456') as v) t2 using(k, v); + +SET join_algorithm = 'full_sorting_merge'; + +select * from (select 0 as k, toInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toInt128('18446744073709551616') as v) t2 using(k, v); +select * from (select 0 as k, toInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toInt256('340282366920938463463374607431768211456') as v) t2 using(k, v); + +select * from (select 0 as k, toUInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toUInt128('18446744073709551616') as v) t2 using(k, v); +select * from (select 0 as k, toUInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toUInt256('340282366920938463463374607431768211456') as v) t2 using(k, v); diff --git a/tests/queries/0_stateless/02241_join_rocksdb_bs.reference b/tests/queries/0_stateless/02241_join_rocksdb_bs.reference index 8416a2991c1..4dff9ef38ef 100644 --- a/tests/queries/0_stateless/02241_join_rocksdb_bs.reference +++ b/tests/queries/0_stateless/02241_join_rocksdb_bs.reference @@ -10,59 +10,3 @@ 1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 diff --git a/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 b/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 index 6121db6d6a2..e5703f99d62 100644 --- a/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 +++ b/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 @@ -1,4 +1,4 @@ --- Tags: use-rocksdb, long +-- Tags: use-rocksdb, long, no-s3-storage SET join_algorithm = 'direct'; @@ -13,27 +13,21 @@ INSERT INTO rdb_{{ table_size }} SELECT (sipHash64(number) % {{ table_size }}) as key, ('val' || toString(key)) AS value FROM numbers_mt({{ table_size }}); -{% for block_size in [10, 11, 128, 129, 65505, 65506, 70000] -%} - -{% if block_size * 5000 > table_size -%} - -SET max_block_size = {{ block_size }}; - {% for right_size in [table_size // 2, table_size + table_size // 4 + 1] -%} SELECT count() == (SELECT count() FROM rdb_{{ table_size }} WHERE key < {{ right_size }}) FROM (SELECT number as k FROM numbers_mt({{ right_size }})) as t1 INNER JOIN rdb_{{ table_size }} as rdb -ON rdb.key == t1.k; +ON rdb.key == t1.k +{% if table_size < 100 %}SETTINGS max_block_size = 1{% endif -%} +; SELECT count() == {{ right_size }} and countIf(value != '') == (SELECT count() FROM rdb_{{ table_size }} WHERE key < {{ right_size }}) FROM (SELECT number as k FROM numbers_mt({{ right_size }})) as t1 LEFT JOIN rdb_{{ table_size }} as rdb -ON rdb.key == t1.k; - -{% endfor -%} - -{% endif -%} +ON rdb.key == t1.k +{% if table_size < 100 %}SETTINGS max_block_size = 1{% endif -%} +; {% endfor -%} {% endfor -%} @@ -41,4 +35,3 @@ ON rdb.key == t1.k; {% for table_size in [10, 65555, 100000] -%} DROP TABLE IF EXISTS rdb_{{ table_size }}; {% endfor -%} - diff --git a/tests/queries/0_stateless/02242_subcolumns_sizes.sql b/tests/queries/0_stateless/02242_subcolumns_sizes.sql index 8c3d8e69238..d29241131d3 100644 --- a/tests/queries/0_stateless/02242_subcolumns_sizes.sql +++ b/tests/queries/0_stateless/02242_subcolumns_sizes.sql @@ -9,6 +9,7 @@ ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0; INSERT INTO t_subcolumns_sizes FORMAT JSONEachRow {"id": 1, "arr": [1, 2, 3], "n": null, "d": {"k1": "v1", "k2": [{"k3": 1, "k4": "v2"}, {"k3": 3}]}} + INSERT INTO t_subcolumns_sizes FORMAT JSONEachRow {"id": 2, "arr": [0], "n": "foo", "d": {"k1": "v3", "k2": [{"k4": "v4"}, {"k3": "v5", "k5": 5}]}} OPTIMIZE TABLE t_subcolumns_sizes FINAL; diff --git a/tests/queries/0_stateless/02249_insert_select_from_input_schema_inference.sql b/tests/queries/0_stateless/02249_insert_select_from_input_schema_inference.sql index eb83309f117..a0c6701faae 100644 --- a/tests/queries/0_stateless/02249_insert_select_from_input_schema_inference.sql +++ b/tests/queries/0_stateless/02249_insert_select_from_input_schema_inference.sql @@ -3,5 +3,6 @@ set use_structure_from_insertion_table_in_table_functions = 1; drop table if exists test_02249; create table test_02249 (x UInt32, y String) engine=Memory(); insert into test_02249 select * from input() format JSONEachRow {"x" : 1, "y" : "string1"}, {"y" : "string2", "x" : 2}; + select * from test_02249; drop table test_02249; diff --git a/tests/queries/0_stateless/02267_jsonlines_ndjson_format.sql b/tests/queries/0_stateless/02267_jsonlines_ndjson_format.sql index 5e32758b328..5dfb17da98c 100644 --- a/tests/queries/0_stateless/02267_jsonlines_ndjson_format.sql +++ b/tests/queries/0_stateless/02267_jsonlines_ndjson_format.sql @@ -6,6 +6,7 @@ DROP TABLE IF EXISTS 02267_t; CREATE TABLE 02267_t (n1 UInt32, n2 UInt32) ENGINE = Memory; INSERT INTO 02267_t FORMAT JSONLines {"n1": 1, "n2": 2} {"n1": 3, "n2": 4} {"n1": 5, "n2": 6}; + INSERT INTO 02267_t FORMAT NDJSON {"n1": 1, "n2": 2} {"n1": 3, "n2": 4} {"n1": 5, "n2": 6}; SELECT * FROM 02267_t ORDER BY n1, n2 FORMAT JSONLines; diff --git a/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql b/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql index a4e60ff54dd..0b10101d8f2 100644 --- a/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql +++ b/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql @@ -19,8 +19,6 @@ SELECT * FROM t1 ANTI JOIN t2 ON t1.key = t2.key; -- { serverError NOT_IMPLEMENT SELECT * FROM t1 SEMI JOIN t2 ON t1.key = t2.key; -- { serverError NOT_IMPLEMENTED } -SELECT * FROM t1 ASOF JOIN t2 ON t1.key = t2.key AND t1.val > t2.val; -- { serverError NOT_IMPLEMENTED } - SELECT * FROM t1 ANY JOIN t2 ON t1.key = t2.key SETTINGS any_join_distinct_right_table_keys = 1; -- { serverError NOT_IMPLEMENTED } SELECT * FROM t1 JOIN t2 USING (key) SETTINGS join_use_nulls = 1; -- { serverError NOT_IMPLEMENTED } diff --git a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect index bde00b306cf..d5b2a278180 100755 --- a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect +++ b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect @@ -1,4 +1,6 @@ #!/usr/bin/expect -f +# Tags: no-debug, no-tsan, no-msan, no-asan, no-ubsan, no-s3-storage +# ^ it can be slower than 60 seconds # This is the regression for the concurrent access in ProgressIndication, # so it is important to read enough rows here (10e6). diff --git a/tests/queries/0_stateless/02346_non_negative_derivative.sql b/tests/queries/0_stateless/02346_non_negative_derivative.sql index 704241da16c..ab648f2ee73 100644 --- a/tests/queries/0_stateless/02346_non_negative_derivative.sql +++ b/tests/queries/0_stateless/02346_non_negative_derivative.sql @@ -58,8 +58,8 @@ SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 13 YEAR) OVER (PAR -- test against wrong arguments/types SELECT ts, metric, nonNegativeDerivative(metric, 1, INTERVAL 3 NANOSECOND) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } SELECT ts, metric, nonNegativeDerivative('string not datetime', ts, INTERVAL 3 NANOSECOND) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 3 NANOSECOND, id) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } -SELECT ts, metric, nonNegativeDerivative(metric) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 3 NANOSECOND, id) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT ts, metric, nonNegativeDerivative(metric) OVER (PARTITION BY metric ORDER BY ts, metric ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -- cleanup DROP TABLE IF EXISTS nnd; diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql index ca16bc3a755..1480d10e14a 100644 --- a/tests/queries/0_stateless/02366_kql_summarize.sql +++ b/tests/queries/0_stateless/02366_kql_summarize.sql @@ -40,7 +40,7 @@ create table Dates EventTime DateTime, ) ENGINE = Memory; -Insert into Dates VALUES ('2015-10-12') , ('2016-10-12') +Insert into Dates VALUES ('2015-10-12') , ('2016-10-12'); Select '-- test summarize --' ; set dialect='kusto'; Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age); diff --git a/tests/queries/0_stateless/02372_analyzer_join.reference b/tests/queries/0_stateless/02372_analyzer_join.reference index eefcb1e50dc..9204dded262 100644 --- a/tests/queries/0_stateless/02372_analyzer_join.reference +++ b/tests/queries/0_stateless/02372_analyzer_join.reference @@ -5,63 +5,63 @@ JOIN INNER SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value -FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 SELECT '--'; -- SELECT t1.value, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_1_Value_1 Join_2_Value_1 -SELECT id FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } -SELECT value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT id FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 SELECT 'JOIN expression aliases'; JOIN expression aliases -SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 SELECT '--'; -- -SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 SELECT 'JOIN LEFT'; @@ -69,75 +69,75 @@ JOIN LEFT SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value -FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 SELECT '--'; -- SELECT t1.value, t2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_1_Value_1 Join_2_Value_1 Join_1_Value_2 -SELECT id FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } -SELECT value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT id FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 0 2 Join_1_Value_2 0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 0 2 Join_1_Value_2 0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 0 2 Join_1_Value_2 0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 SELECT 'JOIN expression aliases'; JOIN expression aliases -SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 SELECT '--'; -- -SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 @@ -146,182 +146,182 @@ JOIN RIGHT SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value -FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 -0 0 3 3 Join_2_Value_3 Join_2_Value_3 SELECT '--'; -- SELECT t1.value, t2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_1_Value_1 Join_2_Value_1 - Join_2_Value_3 -SELECT id FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } -SELECT value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT id FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 1 Join_2_Value_1 0 3 Join_2_Value_3 +0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 1 Join_2_Value_1 0 3 Join_2_Value_3 +0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; 0 1 Join_2_Value_1 0 3 Join_2_Value_3 +0 Join_1_Value_0 0 Join_2_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT 'JOIN expression aliases'; JOIN expression aliases -SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT '--'; -- -SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT 'JOIN FULL'; JOIN FULL SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value -FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 -0 3 Join_2_Value_3 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 -0 3 Join_2_Value_3 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 -0 0 3 3 Join_2_Value_3 Join_2_Value_3 SELECT '--'; -- SELECT t1.value, t2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_1_Value_1 Join_2_Value_1 Join_1_Value_2 - Join_2_Value_3 -SELECT id FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } -SELECT value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT id FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0' ORDER BY ALL; +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 0 2 Join_1_Value_2 0 -0 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 0 2 Join_1_Value_2 0 -0 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 0 2 Join_1_Value_2 0 -0 1 Join_2_Value_1 -0 3 Join_2_Value_3 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 -0 3 Join_2_Value_3 SELECT 'JOIN expression aliases'; JOIN expression aliases -SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 -0 3 Join_2_Value_3 SELECT '--'; -- -SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id ORDER BY ALL; +0 3 Join_2_Value_3 0 Join_1_Value_0 0 Join_2_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 2 Join_1_Value_2 0 -0 3 Join_2_Value_3 SELECT 'First JOIN INNER second JOIN INNER'; First JOIN INNER second JOIN INNER SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; @@ -329,48 +329,48 @@ SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id -INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT 'First JOIN INNER second JOIN LEFT'; @@ -379,14 +379,14 @@ SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; @@ -394,48 +394,48 @@ SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id -LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT 'First JOIN INNER second JOIN RIGHT'; @@ -444,159 +444,159 @@ SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT 'First JOIN INNER second JOIN FULL'; First JOIN INNER second JOIN FULL SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id -FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT 'First JOIN LEFT second JOIN INNER'; First JOIN LEFT second JOIN INNER SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -604,7 +604,7 @@ SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -613,7 +613,7 @@ SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 @@ -621,7 +621,7 @@ SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 @@ -629,20 +629,20 @@ SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 @@ -650,7 +650,7 @@ SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -658,7 +658,7 @@ SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -668,7 +668,7 @@ SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -676,7 +676,7 @@ SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -685,7 +685,7 @@ SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 @@ -693,7 +693,7 @@ SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 @@ -701,7 +701,7 @@ SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 @@ -709,7 +709,7 @@ SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 @@ -718,7 +718,7 @@ JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 @@ -726,7 +726,7 @@ SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -734,7 +734,7 @@ SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -744,184 +744,184 @@ SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT 'First JOIN LEFT second JOIN FULL'; First JOIN LEFT second JOIN FULL SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 -0 0 4 Join_3_Value_4 -0 0 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 -0 0 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT 'First JOIN RIGHT second JOIN INNER'; First JOIN RIGHT second JOIN INNER SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; @@ -929,48 +929,48 @@ SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 SELECT 'First JOIN RIGHT second JOIN LEFT'; @@ -979,246 +979,246 @@ SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 -0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_2_Value_3 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 1 Join_2_Value_1 0 0 3 Join_2_Value_3 0 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_2_Value_3 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 SELECT 'First JOIN RIGHT second JOIN RIGHT'; First JOIN RIGHT second JOIN RIGHT SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT 'First JOIN RIGHT second JOIN FULL'; First JOIN RIGHT second JOIN FULL SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 -0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_2_Value_3 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 -0 1 Join_2_Value_1 0 -0 0 4 Join_3_Value_4 +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 1 Join_2_Value_1 0 -0 3 Join_2_Value_3 0 +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 0 0 4 Join_3_Value_4 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 - Join_2_Value_3 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id -FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT 'First JOIN FULL second JOIN INNER'; First JOIN FULL second JOIN INNER SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -1226,7 +1226,7 @@ SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -1235,7 +1235,7 @@ SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 @@ -1243,7 +1243,7 @@ SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 @@ -1251,20 +1251,20 @@ SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 @@ -1272,7 +1272,7 @@ SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -1280,7 +1280,7 @@ SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id -INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 @@ -1290,265 +1290,265 @@ SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 -0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_2_Value_3 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 -0 3 Join_2_Value_3 0 -0 1 Join_2_Value_1 0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 -0 1 Join_2_Value_1 0 -0 3 Join_2_Value_3 0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_2_Value_3 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id -LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 SELECT 'First JOIN FULL second JOIN RIGHT'; First JOIN FULL second JOIN RIGHT SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; -0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; 0 0 1 Join_3_Value_1 0 0 4 Join_3_Value_4 +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id -RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 0 4 Join_3_Value_4 SELECT 'First JOIN FULL second JOIN FULL'; First JOIN FULL second JOIN FULL SELECT 'JOIN ON without conditions'; JOIN ON without conditions SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 -0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 -0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 SELECT '--'; -- SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; + Join_3_Value_4 + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_2_Value_3 - Join_3_Value_4 SELECT 'JOIN ON with conditions'; JOIN ON with conditions SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 -0 3 Join_2_Value_3 0 -0 1 Join_2_Value_1 0 -0 0 4 Join_3_Value_4 -0 0 1 Join_3_Value_1 SELECT '--'; -- SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 0 0 2 Join_1_Value_2 0 0 -0 1 Join_2_Value_1 0 -0 3 Join_2_Value_3 0 -0 0 1 Join_3_Value_1 -0 0 4 Join_3_Value_4 SELECT 'JOIN multiple clauses'; JOIN multiple clauses SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; + Join_3_Value_4 + Join_2_Value_3 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 Join_1_Value_2 Join_3_Value_0 - Join_2_Value_3 - Join_3_Value_4 SELECT 'JOIN expression aliases'; JOIN expression aliases SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 SELECT '--'; -- SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id -FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; +0 0 4 Join_3_Value_4 +0 3 Join_2_Value_3 0 0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 2 Join_1_Value_2 0 0 Join_3_Value_0 -0 3 Join_2_Value_3 0 -0 0 4 Join_3_Value_4 diff --git a/tests/queries/0_stateless/02372_analyzer_join.sql.j2 b/tests/queries/0_stateless/02372_analyzer_join.sql.j2 index facf4dc018b..45ae63b9a49 100644 --- a/tests/queries/0_stateless/02372_analyzer_join.sql.j2 +++ b/tests/queries/0_stateless/02372_analyzer_join.sql.j2 @@ -45,59 +45,59 @@ SELECT 'JOIN {{ join_type }}'; SELECT 'JOIN ON without conditions'; SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value -FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; SELECT '--'; SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; SELECT '--'; SELECT t1.value, t2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; -SELECT id FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT id FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } -SELECT value FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT value FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id ORDER BY ALL; -- { serverError AMBIGUOUS_IDENTIFIER } SELECT 'JOIN ON with conditions'; SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' ORDER BY ALL; SELECT '--'; SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0' ORDER BY ALL; SELECT '--'; SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; SELECT '--'; SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0' ORDER BY ALL; SELECT 'JOIN multiple clauses'; SELECT t1.id, t1.value, t2.id, t2.value -FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id ORDER BY ALL; SELECT 'JOIN expression aliases'; -SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) ORDER BY ALL; SELECT '--'; -SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1_id = t2_id ORDER BY ALL; {% endfor %} @@ -110,56 +110,56 @@ SELECT 'JOIN ON without conditions'; SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value FROM test_table_join_1 {{ first_join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id -{{ second_join_type }} JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +{{ second_join_type }} JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; SELECT '--'; SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; SELECT '--'; SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, t3.id, test_table_join_3.id, t3.value, test_table_join_3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; SELECT '--'; SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id ORDER BY ALL; SELECT 'JOIN ON with conditions'; SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' ORDER BY ALL; SELECT '--'; SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0' ORDER BY ALL; SELECT 'JOIN multiple clauses'; SELECT t1.value, t2.value, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id ORDER BY ALL; SELECT 'JOIN expression aliases'; SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id) ORDER BY ALL; SELECT '--'; SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1_id = t2_id -{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2_id = t3_id ORDER BY ALL; {% endfor %} {% endfor %} diff --git a/tests/queries/0_stateless/02372_data_race_in_avro.sh b/tests/queries/0_stateless/02372_data_race_in_avro.sh index 49c34e31923..50a7ae1e3c5 100755 --- a/tests/queries/0_stateless/02372_data_race_in_avro.sh +++ b/tests/queries/0_stateless/02372_data_race_in_avro.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference index 3722c23e4a0..2b3671e1ea6 100644 --- a/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference +++ b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference @@ -1,27 +1,27 @@ -- { echoOn } SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String SELECT '--'; -- SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 UInt64 Join_1_Value_0 String 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) 1 UInt64 Join_1_Value_1 String 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) 2 UInt64 Join_1_Value_2 String \N Nullable(UInt64) \N Nullable(String) SELECT '--'; -- SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 UInt64 Join_2_Value_0 String 1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 UInt64 Join_2_Value_1 String \N Nullable(UInt64) \N Nullable(String) 3 UInt64 Join_2_Value_3 String SELECT '--'; -- SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; 0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) 1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) 2 Nullable(UInt64) Join_1_Value_2 Nullable(String) \N Nullable(UInt64) \N Nullable(String) @@ -30,14 +30,14 @@ SELECT '--'; -- SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; 0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String SELECT '--'; -- SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; 0 UInt64 0 UInt64 Join_1_Value_0 String 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) 1 UInt64 1 UInt64 Join_1_Value_1 String 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) 2 UInt64 2 UInt64 Join_1_Value_2 String \N Nullable(UInt64) \N Nullable(String) @@ -45,7 +45,7 @@ SELECT '--'; -- SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; 0 UInt64 0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 UInt64 Join_2_Value_0 String 1 UInt64 1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 UInt64 Join_2_Value_1 String 3 UInt64 \N Nullable(UInt64) \N Nullable(String) 3 UInt64 Join_2_Value_3 String @@ -53,7 +53,7 @@ SELECT '--'; -- SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; 0 Nullable(UInt64) 0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) 1 Nullable(UInt64) 1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) 2 Nullable(UInt64) 2 Nullable(UInt64) Join_1_Value_2 Nullable(String) \N Nullable(UInt64) \N Nullable(String) diff --git a/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql index db7895084e8..bcec6d178a8 100644 --- a/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql +++ b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql @@ -26,46 +26,46 @@ INSERT INTO test_table_join_2 VALUES (3, 'Join_2_Value_3'); -- { echoOn } SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; SELECT '--'; SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; SELECT '--'; SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; SELECT '--'; SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY ALL; SELECT '--'; SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; SELECT '--'; SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; SELECT '--'; SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; SELECT '--'; SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) -FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) ORDER BY ALL; -- { echoOff } diff --git a/tests/queries/0_stateless/02387_parse_date_as_datetime.sql b/tests/queries/0_stateless/02387_parse_date_as_datetime.sql index 9727f677be2..24d367e56b6 100644 --- a/tests/queries/0_stateless/02387_parse_date_as_datetime.sql +++ b/tests/queries/0_stateless/02387_parse_date_as_datetime.sql @@ -1,13 +1,19 @@ CREATE TEMPORARY TABLE test (`i` Int64, `d` DateTime); + INSERT INTO test FORMAT JSONEachRow {"i": 123, "d": "2022-05-03"}; + INSERT INTO test FORMAT JSONEachRow {"i": 456, "d": "2022-05-03 01:02:03"}; + SELECT * FROM test ORDER BY i; DROP TABLE test; CREATE TEMPORARY TABLE test (`i` Int64, `d` DateTime64); + INSERT INTO test FORMAT JSONEachRow {"i": 123, "d": "2022-05-03"}; + INSERT INTO test FORMAT JSONEachRow {"i": 456, "d": "2022-05-03 01:02:03"}; + SELECT * FROM test ORDER BY i; DROP TABLE test; diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh index 776d1f850b0..a0312b7a40c 100755 --- a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh +++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh @@ -25,7 +25,7 @@ function insert_data $CLICKHOUSE_CURL -sS -d 'begin transaction' "$CLICKHOUSE_URL&$TXN_SETTINGS" SETTINGS="$SETTINGS&session_check=1" BEGIN="begin transaction;" - COMMIT=$(echo -ne "\n\ncommit") + COMMIT=$(echo -ne "\n\n\ncommit") fi # max_block_size=10000, so external table will contain smaller blocks that will be squashed on insert-select (more chances to catch a bug on query cancellation) diff --git a/tests/queries/0_stateless/02444_async_broken_outdated_part_loading.sh b/tests/queries/0_stateless/02444_async_broken_outdated_part_loading.sh index d24c6afcef3..a1313ba16e5 100755 --- a/tests/queries/0_stateless/02444_async_broken_outdated_part_loading.sh +++ b/tests/queries/0_stateless/02444_async_broken_outdated_part_loading.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -q "drop table if exists rmt sync;" -$CLICKHOUSE_CLIENT -q "create table rmt (n int) engine=ReplicatedMergeTree('/test/02444/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') order by n" +$CLICKHOUSE_CLIENT -q "create table rmt (n int) engine=ReplicatedMergeTree('/test/02444/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') order by n settings old_parts_lifetime=600" $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into rmt values (1);" $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into rmt values (2);" diff --git a/tests/queries/0_stateless/02446_parent_zero_copy_locks.sql b/tests/queries/0_stateless/02446_parent_zero_copy_locks.sql index 86eda526c72..a44322e02cf 100644 --- a/tests/queries/0_stateless/02446_parent_zero_copy_locks.sql +++ b/tests/queries/0_stateless/02446_parent_zero_copy_locks.sql @@ -7,7 +7,7 @@ create table rmt2 (n int, m int, k int) engine=ReplicatedMergeTree('/test/02446/ settings storage_policy='s3_cache', allow_remote_fs_zero_copy_replication=1, old_parts_lifetime=0, cleanup_delay_period=0, max_cleanup_delay_period=1, cleanup_delay_period_random_add=1, min_bytes_for_wide_part=0; -- FIXME zero-copy locks may remain in ZooKeeper forever if we failed to insert a part. --- Probably that's why we have to replace repsistent lock with ephemeral sometimes. +-- Probably that's why we have to replace persistent lock with ephemeral sometimes. -- See also "Replacing persistent lock with ephemeral for path {}. It can happen only in case of local part loss" -- in StorageReplicatedMergeTree::createZeroCopyLockNode set insert_keeper_fault_injection_probability=0; @@ -23,6 +23,10 @@ select sleepEachRow(0.5) as test_does_not_rely_on_this; insert into rmt1 values(5, 5, 5); alter table rmt2 update m = m * 10 where 1 settings mutations_sync=2; +-- wait for parts to be merged +select throwIf(name = 'all_0_5_1_6') from system.parts where database=currentDatabase() and table like 'rmt%' and active +format Null; -- { retry 30 until serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + system sync replica rmt2; set optimize_throw_if_noop=1; optimize table rmt2 final; @@ -32,10 +36,10 @@ select 1, * from rmt1 order by n; system sync replica rmt1; select 2, * from rmt2 order by n; --- a funny way to wait for outdated parts to be removed -select sleep(1), sleepEachRow(0.1) from url('http://localhost:8123/?param_tries={1..10}&query=' || encodeURLComponent( - 'select *, _state from system.parts where database=''' || currentDatabase() || ''' and table like ''rmt%'' and active=0' - ), 'LineAsString', 's String') settings max_threads=1 format Null; +-- wait for outdated parts to be removed (do not ignore _state column, so it will count Deleting parts as well) +select throwIf(count() = 0), groupArray(_state) from ( +select *, _state from system.parts where database=currentDatabase() and table like 'rmt%' and active=0 +) format Null; -- { retry 30 until serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } select *, _state from system.parts where database=currentDatabase() and table like 'rmt%' and active=0; diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh index 93a5fcee8e2..c6bf298f944 100755 --- a/tests/queries/0_stateless/02447_drop_database_replica.sh +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -1,5 +1,9 @@ #!/usr/bin/env bash +# Tags: no-parallel +# no-parallel: This test is not parallel because when we execute system-wide SYSTEM DROP REPLICA, +# other tests might shut down the storage in parallel and the test will fail. + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql index 71a2381d7b6..845f895b7b4 100644 --- a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql +++ b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql @@ -7,21 +7,22 @@ create table test (x Nullable(UInt32), y UInt32) engine=Memory(); set use_structure_from_insertion_table_in_table_functions=2; set input_format_json_infer_incomplete_types_as_strings=0; + insert into test select * from file(02458_data.jsonl); insert into test select x, 1 from file(02458_data.jsonl); insert into test select x, y from file(02458_data.jsonl); insert into test select x + 1, y from file(02458_data.jsonl); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} insert into test select x, z from file(02458_data.jsonl); - insert into test select * from file(02458_data.jsoncompacteachrow); insert into test select x, 1 from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} insert into test select x, y from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} insert into test select x + 1, y from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} insert into test select x, z from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} - insert into test select * from input() format CSV 1,2 + insert into test select x, y from input() format CSV 1,2 -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} -insert into test select x, y from input() format JSONEachRow {"x" : null, "y" : 42} + +insert into test select x, y from input() format JSONEachRow {"x" : null, "y" : 42}; select * from test order by y; @@ -31,9 +32,9 @@ insert into test select * from file(02458_data.jsonl); insert into test select x from file(02458_data.jsonl); insert into test select y from file(02458_data.jsonl); insert into test select y as x from file(02458_data.jsonl); - insert into test select c1 from input() format CSV 1,2; -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} -insert into test select x from input() format JSONEachRow {"x" : null, "y" : 42} + +insert into test select x from input() format JSONEachRow {"x" : null, "y" : 42}; select * from test order by x; diff --git a/tests/queries/0_stateless/02477_age_datetime64.reference b/tests/queries/0_stateless/02477_age_datetime64.reference index 3b4459dd26d..fb085f461c9 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.reference +++ b/tests/queries/0_stateless/02477_age_datetime64.reference @@ -111,3 +111,8 @@ SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), ma 1 SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); 1 +-- UBsan bug #66638 +set session_timezone = 'UTC'; +SELECT age('second', toDateTime(1157339245694594829, 6, 'UTC'), toDate('2015-08-18')) + +-8973935999 diff --git a/tests/queries/0_stateless/02477_age_datetime64.sql b/tests/queries/0_stateless/02477_age_datetime64.sql index 1bed93991ca..b5fa4da8837 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.sql +++ b/tests/queries/0_stateless/02477_age_datetime64.sql @@ -75,3 +75,7 @@ SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); + +-- UBsan bug #66638 +set session_timezone = 'UTC'; +SELECT age('second', toDateTime(1157339245694594829, 6, 'UTC'), toDate('2015-08-18')) diff --git a/tests/queries/0_stateless/02481_async_insert_race_long.sh b/tests/queries/0_stateless/02481_async_insert_race_long.sh index d8153967e9a..b0088017d32 100755 --- a/tests/queries/0_stateless/02481_async_insert_race_long.sh +++ b/tests/queries/0_stateless/02481_async_insert_race_long.sh @@ -13,7 +13,7 @@ function insert1() { local TIMELIMIT=$((SECONDS+$1)) while [ $SECONDS -lt "$TIMELIMIT" ]; do - ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 0 -q 'INSERT INTO async_inserts_race FORMAT CSV 1,"a"' + ${MY_CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --wait_for_async_insert 0 -q 'INSERT INTO async_inserts_race FORMAT CSV 1,"a"' done } @@ -21,7 +21,7 @@ function insert2() { local TIMELIMIT=$((SECONDS+$1)) while [ $SECONDS -lt "$TIMELIMIT" ]; do - ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 0 -q 'INSERT INTO async_inserts_race FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' + ${MY_CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --wait_for_async_insert 0 -q 'INSERT INTO async_inserts_race FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' done } @@ -29,7 +29,7 @@ function insert3() { local TIMELIMIT=$((SECONDS+$1)) while [ $SECONDS -lt "$TIMELIMIT" ]; do - ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" & + ${MY_CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" & sleep 0.05 done diff --git a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh index 55e6ac2f758..6fd6da69b70 100755 --- a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh +++ b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -# Tags: no-ubsan, no-fasttest, no-tsan +# Tags: no-ubsan, no-fasttest, no-tsan, no-msan, no-asan # It is too slow under TSan +# It eats too much memory under ASan or MSan CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh index 3c046020773..f747b3156a5 100755 --- a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh +++ b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh @@ -5,4 +5,11 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh $CLICKHOUSE_BENCHMARK -q 'select throwIf(1)' |& grep '^An error occurred while processing the query.*Exception:' -c -$CLICKHOUSE_BENCHMARK --max-consecutive-errors 10 -q 'select throwIf(1)' |& grep '^An error occurred while processing the query.*Exception:' -c +RES=$($CLICKHOUSE_BENCHMARK --max-consecutive-errors 10 -q 'select throwIf(1)' |& tee "${CLICKHOUSE_TMP}/${CLICKHOUSE_DATABASE}.log" | grep '^An error occurred while processing the query.*Exception:' -c) + +if [ "$RES" -eq 10 ] +then + echo "$RES" +else + cat "${CLICKHOUSE_TMP}/${CLICKHOUSE_DATABASE}.log" +fi diff --git a/tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.reference b/tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.sql b/tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.sql new file mode 100644 index 00000000000..d8de4facb38 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.sql @@ -0,0 +1,13 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY CACHE; + +-- Create an entry in the query cache +SELECT 1 SETTINGS use_query_cache = true; + +-- Asynchronous metrics must know about the entry +SYSTEM RELOAD ASYNCHRONOUS METRICS; +SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02495_analyzer_storage_join.sql b/tests/queries/0_stateless/02495_analyzer_storage_join.sql index aeab15862c3..7e6c03971f9 100644 --- a/tests/queries/0_stateless/02495_analyzer_storage_join.sql +++ b/tests/queries/0_stateless/02495_analyzer_storage_join.sql @@ -23,7 +23,7 @@ SELECT x FROM t1 ALL RIGHT JOIN tj ON t1.id1 == tj.key1 AND t1.id2 == tj.key2 OR SELECT '--- name clashes ---'; CREATE TABLE t (key2 UInt64, key1 Int64, b UInt64, x UInt64, val UInt64) ENGINE = Memory; -INSERT INTO t VALUES (1, -1, 11, 111, 1111), (2, -2, 22, 222, 2222), (3, -3, 33, 333, 2222), (4, -4, 44, 444, 4444), (5, -5, 55, 555, 5555) +INSERT INTO t VALUES (1, -1, 11, 111, 1111), (2, -2, 22, 222, 2222), (3, -3, 33, 333, 2222), (4, -4, 44, 444, 4444), (5, -5, 55, 555, 5555); SELECT '-- using --'; diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_table_source.sql b/tests/queries/0_stateless/02504_regexp_dictionary_table_source.sql index 487b6e7f58e..604c5b179cf 100644 --- a/tests/queries/0_stateless/02504_regexp_dictionary_table_source.sql +++ b/tests/queries/0_stateless/02504_regexp_dictionary_table_source.sql @@ -14,12 +14,12 @@ CREATE TABLE regexp_dictionary_source_table -- test back reference. -INSERT INTO regexp_dictionary_source_table VALUES (1, 0, 'Linux/(\d+[\.\d]*).+tlinux', ['name', 'version'], ['TencentOS', '\1']) -INSERT INTO regexp_dictionary_source_table VALUES (2, 0, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Android', '$1', 'test $1 and $2']) -INSERT INTO regexp_dictionary_source_table VALUES (3, 2, '33/tclwebkit', ['version'], ['13']) -INSERT INTO regexp_dictionary_source_table VALUES (4, 2, '3[12]/tclwebkit', ['version'], ['12']) -INSERT INTO regexp_dictionary_source_table VALUES (5, 2, '3[12]/tclwebkit', ['version'], ['11']) -INSERT INTO regexp_dictionary_source_table VALUES (6, 2, '3[12]/tclwebkit', ['version'], ['10']) +INSERT INTO regexp_dictionary_source_table VALUES (1, 0, 'Linux/(\d+[\.\d]*).+tlinux', ['name', 'version'], ['TencentOS', '\1']); +INSERT INTO regexp_dictionary_source_table VALUES (2, 0, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Android', '$1', 'test $1 and $2']); +INSERT INTO regexp_dictionary_source_table VALUES (3, 2, '33/tclwebkit', ['version'], ['13']); +INSERT INTO regexp_dictionary_source_table VALUES (4, 2, '3[12]/tclwebkit', ['version'], ['12']); +INSERT INTO regexp_dictionary_source_table VALUES (5, 2, '3[12]/tclwebkit', ['version'], ['11']); +INSERT INTO regexp_dictionary_source_table VALUES (6, 2, '3[12]/tclwebkit', ['version'], ['10']); create dictionary regexp_dict1 ( @@ -50,23 +50,24 @@ CREATE TABLE needle_table ENGINE=TinyLog; INSERT INTO needle_table select concat(toString(number + 30), '/tclwebkit', toString(number)) from system.numbers limit 15; + select * from needle_table; select dictGet(regexp_dict1, ('name', 'version'), key) from needle_table; -- test invalid -INSERT INTO regexp_dictionary_source_table VALUES (6, 2, '3[12]/tclwebkit', ['version'], ['10']) +INSERT INTO regexp_dictionary_source_table VALUES (6, 2, '3[12]/tclwebkit', ['version'], ['10']); SYSTEM RELOAD dictionary regexp_dict1; -- { serverError INCORRECT_DICTIONARY_DEFINITION } truncate table regexp_dictionary_source_table; -INSERT INTO regexp_dictionary_source_table VALUES (6, 2, '3[12]/tclwebkit', ['version'], ['10']) +INSERT INTO regexp_dictionary_source_table VALUES (6, 2, '3[12]/tclwebkit', ['version'], ['10']); SYSTEM RELOAD dictionary regexp_dict1; -- { serverError INCORRECT_DICTIONARY_DEFINITION } truncate table regexp_dictionary_source_table; -INSERT INTO regexp_dictionary_source_table VALUES (1, 2, 'Linux/(\d+[\.\d]*).+tlinux', ['name', 'version'], ['TencentOS', '\1']) -INSERT INTO regexp_dictionary_source_table VALUES (2, 3, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Android', '$1', 'test $1 and $2']) -INSERT INTO regexp_dictionary_source_table VALUES (3, 1, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Android', '$1', 'test $1 and $2']) +INSERT INTO regexp_dictionary_source_table VALUES (1, 2, 'Linux/(\d+[\.\d]*).+tlinux', ['name', 'version'], ['TencentOS', '\1']); +INSERT INTO regexp_dictionary_source_table VALUES (2, 3, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Android', '$1', 'test $1 and $2']); +INSERT INTO regexp_dictionary_source_table VALUES (3, 1, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Android', '$1', 'test $1 and $2']); SYSTEM RELOAD dictionary regexp_dict1; -- { serverError INCORRECT_DICTIONARY_DEFINITION } -- test priority diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.reference b/tests/queries/0_stateless/02532_send_logs_level_test.reference index 7e51b888d9c..72f4ea06184 100644 --- a/tests/queries/0_stateless/02532_send_logs_level_test.reference +++ b/tests/queries/0_stateless/02532_send_logs_level_test.reference @@ -1,3 +1,4 @@ + MergeTreeReadPoolBase: Will use min_marks_per_task=24 MergeTreeMarksLoader: Loading marks from path data.cmrk3 MergeTreeRangeReader: First reader returned: num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]: Int32(size = 1), requested columns: key MergeTreeRangeReader: read() returned num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]: Int32(size = 1), sample block key diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh index 71f42e2a6db..506ac2331f2 100755 --- a/tests/queries/0_stateless/02532_send_logs_level_test.sh +++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh @@ -1,7 +1,8 @@ #!/usr/bin/env bash -# Tags: no-object-storage, no-debug +# Tags: no-object-storage, no-debug, no-random-merge-tree-settings # - no-object-storage - S3 has additional logging # - no-debug - debug builds also has additional logging +# - no-random-merge-tree-settings - changes content of log messages CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql index 926bde3a74b..b88224a89c1 100644 --- a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql +++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql @@ -7,7 +7,7 @@ CREATE TABLE test_tab engine = MergeTree() ORDER BY id; -INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'xx') (2, 'Hello World', 'll', 'x') (3, 'Hello World', 'not_found', 'x') (4, 'Hello World', '[eo]', 'x') (5, 'Hello World', '.', 'x') +INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'xx') (2, 'Hello World', 'll', 'x') (3, 'Hello World', 'not_found', 'x') (4, 'Hello World', '[eo]', 'x') (5, 'Hello World', '.', 'x'); SELECT '** replaceAll() **'; @@ -77,7 +77,7 @@ CREATE TABLE test_tab engine = MergeTree() ORDER BY id; -INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'x') (2, 'Hello World', '', 'y') +INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'x') (2, 'Hello World', '', 'y'); -- needle: non-const, replacement: const SELECT replaceAll(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } diff --git a/tests/queries/0_stateless/02553_type_json_attach_partition.sql b/tests/queries/0_stateless/02553_type_json_attach_partition.sql index 9225106f767..e77f5885ec3 100644 --- a/tests/queries/0_stateless/02553_type_json_attach_partition.sql +++ b/tests/queries/0_stateless/02553_type_json_attach_partition.sql @@ -5,6 +5,7 @@ DROP TABLE IF EXISTS t_json_attach_partition; CREATE TABLE t_json_attach_partition(b UInt64, c JSON) ENGINE = MergeTree ORDER BY tuple(); INSERT INTO t_json_attach_partition FORMAT JSONEachRow {"b": 1, "c" : {"k1": 1}}; + ALTER TABLE t_json_attach_partition DETACH PARTITION tuple(); INSERT INTO t_json_attach_partition FORMAT JSONEachRow {"b": 1, "c" : {"k1": [1, 2]}}; diff --git a/tests/queries/0_stateless/02560_window_ntile.reference b/tests/queries/0_stateless/02560_window_ntile.reference index d877b2034cb..5f01832d075 100644 --- a/tests/queries/0_stateless/02560_window_ntile.reference +++ b/tests/queries/0_stateless/02560_window_ntile.reference @@ -213,6 +213,8 @@ select a, b, ntile('2') over (partition by a order by b) from(select intDiv(numb select a, b, ntile(0) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError BAD_ARGUMENTS } select a, b, ntile(-2) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError BAD_ARGUMENTS } select a, b, ntile(b + 1) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError BAD_ARGUMENTS } +select a, b, ntile() over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select a, b, ntile(3, 2) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -- Bad window type select a, b, ntile(2) over (partition by a) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError BAD_ARGUMENTS } select a, b, ntile(2) over (partition by a order by b rows between 4 preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02560_window_ntile.sql b/tests/queries/0_stateless/02560_window_ntile.sql index 44e9b865052..d0e4d557e58 100644 --- a/tests/queries/0_stateless/02560_window_ntile.sql +++ b/tests/queries/0_stateless/02560_window_ntile.sql @@ -16,6 +16,8 @@ select a, b, ntile('2') over (partition by a order by b) from(select intDiv(numb select a, b, ntile(0) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError BAD_ARGUMENTS } select a, b, ntile(-2) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError BAD_ARGUMENTS } select a, b, ntile(b + 1) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError BAD_ARGUMENTS } +select a, b, ntile() over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select a, b, ntile(3, 2) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -- Bad window type select a, b, ntile(2) over (partition by a) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02563_analyzer_merge.reference b/tests/queries/0_stateless/02563_analyzer_merge.reference index 8be01c88d6f..2b3cc2d5dfb 100644 --- a/tests/queries/0_stateless/02563_analyzer_merge.reference +++ b/tests/queries/0_stateless/02563_analyzer_merge.reference @@ -1,2 +1,3 @@ 0 Value_0 02563_db test_merge_table_1 1 Value_1 02563_db test_merge_table_2 +91138316-5127-45ac-9c25-4ad8779777b4 160 diff --git a/tests/queries/0_stateless/02563_analyzer_merge.sql b/tests/queries/0_stateless/02563_analyzer_merge.sql index c90f7dcb2a5..217fb7019c4 100644 --- a/tests/queries/0_stateless/02563_analyzer_merge.sql +++ b/tests/queries/0_stateless/02563_analyzer_merge.sql @@ -35,4 +35,49 @@ SELECT id, value, _database, _table FROM 02563_db.test_merge_table ORDER BY id; DROP TABLE 02563_db.test_merge_table; DROP TABLE 02563_db.test_merge_table_1; DROP TABLE 02563_db.test_merge_table_2; + +CREATE TABLE 02563_db.t_1 +( + timestamp DateTime64(9), + a String, + b String +) +ENGINE = MergeTree +PARTITION BY formatDateTime(toStartOfMinute(timestamp), '%Y%m%d%H', 'UTC') +ORDER BY (timestamp, a, b); + +CREATE TABLE 02563_db.dist_t_1 (timestamp DateTime64(9), a String, b String) ENGINE = Distributed('test_shard_localhost', '02563_db', 't_1'); + +CREATE TABLE 02563_db.m ENGINE = Merge('02563_db', '^dist_'); + +INSERT INTO 02563_db.t_1 (timestamp, a, b) +select + addMinutes(toDateTime64('2024-07-13 22:00:00', 9, 'UTC'), number), + randomString(5), + randomString(5) +from numbers(30); + +INSERT INTO 02563_db.t_1 (timestamp, a, b) +select + addMinutes(toDateTime64('2024-07-13 23:00:00', 9, 'UTC'), number), + randomString(5), + randomString(5) +from numbers(30); + +INSERT INTO 02563_db.t_1 (timestamp, a, b) +select + addMinutes(toDateTime64('2024-07-14 00:00:00', 9, 'UTC'), number), + randomString(5), + randomString(5) +from numbers(100); + + +SELECT '91138316-5127-45ac-9c25-4ad8779777b4', + count() +FROM 02563_db.m; + +DROP TABLE 02563_db.t_1; +DROP TABLE 02563_db.dist_t_1; +DROP TABLE 02563_db.m; + DROP DATABASE 02563_db; diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference index 22dfaf93781..d7f2272f5b4 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference @@ -1,20 +1,4 @@ --- { echoOn } -insert into buffer_02572 values (1); --- ensure that the flush was not direct -select * from data_02572; -select * from copy_02572; --- we cannot use OPTIMIZE, this will attach query context, so let's wait -SET function_sleep_max_microseconds_per_block = 6000000; -select sleepEachRow(1) from numbers(3*2) format Null; -select * from data_02572; +OK 1 -select * from copy_02572; 1 -system flush logs; -select count() > 0, lower(status::String), errorCodeToName(exception_code) - from system.query_views_log where - view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and - view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') - group by 2, 3 -; 1 queryfinish OK diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sh b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh new file mode 100755 index 00000000000..a3e428e75c8 --- /dev/null +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# INSERT buffer_02572 -> data_02572 -> copy_02572 +# ^^ +# push to system.query_views_log + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "drop table if exists buffer_02572; + drop table if exists data_02572; drop table if exists copy_02572; drop table if exists mv_02572;" + +${CLICKHOUSE_CLIENT} --query="create table copy_02572 (key Int) engine=Memory();" +${CLICKHOUSE_CLIENT} --query="create table data_02572 (key Int) engine=Memory();" +${CLICKHOUSE_CLIENT} --query="create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, 3, 3, 1, 1e9, 1, 1e9);" +${CLICKHOUSE_CLIENT} --query="create materialized view mv_02572 to copy_02572 as select * from data_02572;" + +${CLICKHOUSE_CLIENT} --query="insert into buffer_02572 values (1);" + +# ensure that the flush was not direct +${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" + +# we cannot use OPTIMIZE, this will attach query context, so let's wait +for _ in {1..100}; do + $CLICKHOUSE_CLIENT -q "select * from data_02572;" | grep -q "1" && echo 'OK' && break + sleep 0.5 +done + + +${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" + +${CLICKHOUSE_CLIENT} --query="system flush logs;" +${CLICKHOUSE_CLIENT} --query="select count() > 0, lower(status::String), errorCodeToName(exception_code) + from system.query_views_log where + view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and + view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') + group by 2, 3;" \ No newline at end of file diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql deleted file mode 100644 index 939c189c5fe..00000000000 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql +++ /dev/null @@ -1,36 +0,0 @@ --- INSERT buffer_02572 -> data_02572 -> copy_02572 --- ^^ --- push to system.query_views_log - -drop table if exists buffer_02572; -drop table if exists data_02572; -drop table if exists copy_02572; -drop table if exists mv_02572; - -create table copy_02572 (key Int) engine=Memory(); -create table data_02572 (key Int) engine=Memory(); -create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, - /* never direct flush for flush from background thread */ - /* min_time= */ 3, 3, - 1, 1e9, - 1, 1e9); -create materialized view mv_02572 to copy_02572 as select * from data_02572; - --- { echoOn } -insert into buffer_02572 values (1); --- ensure that the flush was not direct -select * from data_02572; -select * from copy_02572; --- we cannot use OPTIMIZE, this will attach query context, so let's wait -SET function_sleep_max_microseconds_per_block = 6000000; -select sleepEachRow(1) from numbers(3*2) format Null; -select * from data_02572; -select * from copy_02572; - -system flush logs; -select count() > 0, lower(status::String), errorCodeToName(exception_code) - from system.query_views_log where - view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and - view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') - group by 2, 3 -; diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.reference b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.reference index 3a92fcf283d..d3e7e113ffe 100644 --- a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.reference +++ b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.reference @@ -3,7 +3,11 @@ all_1_1_0 all_2_2_0 all_3_3_0 all_4_4_0 +40000 5000 all_1_1_0_9 5000 all_2_2_0_9 5000 all_3_3_0_9 5000 all_4_4_0_9 +mutation_version has_parts_for_which_set_was_built has_parts_that_shared_set +8 1 1 +9 1 1 diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql index 741d0177971..6b0677a80ae 100644 --- a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql +++ b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql @@ -18,12 +18,35 @@ SELECT name FROM system.parts WHERE database=currentDatabase() AND table = '0258 -- Start multiple mutations simultaneously SYSTEM STOP MERGES 02581_trips; -ALTER TABLE 02581_trips UPDATE description='5' WHERE id IN (SELECT (number*10 + 5)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0; -ALTER TABLE 02581_trips UPDATE description='6' WHERE id IN (SELECT (number*10 + 6)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0; -ALTER TABLE 02581_trips DELETE WHERE id IN (SELECT (number*10 + 7)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0; -ALTER TABLE 02581_trips UPDATE description='8' WHERE id IN (SELECT (number*10 + 8)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0; +ALTER TABLE 02581_trips UPDATE description='5' WHERE id IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=0; +ALTER TABLE 02581_trips UPDATE description='6' WHERE id IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=0; +ALTER TABLE 02581_trips DELETE WHERE id IN (SELECT (number*10 + 7)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=0; +ALTER TABLE 02581_trips UPDATE description='8' WHERE id IN (SELECT (number*10 + 8)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=0; SYSTEM START MERGES 02581_trips; -DELETE FROM 02581_trips WHERE id IN (SELECT (number*10 + 9)::UInt32 FROM numbers(200000000)); -SELECT count(), _part from 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part; + +-- Wait for mutations to finish +SELECT count() FROM 02581_trips SETTINGS select_sequential_consistency = 1; + +DELETE FROM 02581_trips WHERE id IN (SELECT (number*10 + 9)::UInt32 FROM numbers(10000000)) SETTINGS lightweight_deletes_sync = 2; +SELECT count(), _part from 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part SETTINGS select_sequential_consistency=1; + +SYSTEM FLUSH LOGS; +-- Check that in every mutation there were parts that built sets (log messages like 'Created Set with 10000000 entries from 10000000 rows in 0.388989187 sec.' ) +-- and parts that shared sets (log messages like 'Got set from cache in 0.388930505 sec.' ) +WITH ( + SELECT uuid + FROM system.tables + WHERE (database = currentDatabase()) AND (name = '02581_trips') + ) AS table_uuid +SELECT + CAST(splitByChar('_', query_id)[5], 'UInt64') AS mutation_version, -- '5521485f-8a40-4aba-87a2-00342c369563::all_3_3_0_6' + sum(message LIKE 'Created Set with % entries%') >= 1 AS has_parts_for_which_set_was_built, + sum(message LIKE 'Got set from cache%') >= 1 AS has_parts_that_shared_set +FROM system.text_log +WHERE + query_id LIKE concat(CAST(table_uuid, 'String'), '::all\\_%') + AND (event_date >= yesterday()) + AND (message LIKE 'Created Set with % entries%' OR message LIKE 'Got set from cache%') +GROUP BY mutation_version ORDER BY mutation_version FORMAT TSVWithNames; DROP TABLE 02581_trips; diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.reference b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.reference index 3a7410d925f..eecd768eb5e 100644 --- a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.reference +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.reference @@ -10,3 +10,11 @@ all_4_4_0 20000 16000 12000 +mutation_version has_parts_for_which_set_was_built has_parts_that_shared_set +5 1 1 +6 1 1 +7 1 1 +8 1 1 +9 1 1 +10 1 1 +11 1 1 diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql index b7314c8fa47..091a9c8171d 100644 --- a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql @@ -18,42 +18,63 @@ SELECT count() from 02581_trips WHERE description = ''; SELECT name FROM system.parts WHERE database=currentDatabase() AND table = '02581_trips' AND active ORDER BY name; -- Run mutation with `id` a 'IN big subquery' -ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2; +ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; SELECT count() from 02581_trips WHERE description = ''; -ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10 + 1)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2, max_rows_in_set=1000; +ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10 + 1)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2, max_rows_in_set=1000; SELECT count() from 02581_trips WHERE description = ''; -- Run mutation with func(`id`) IN big subquery -ALTER TABLE 02581_trips UPDATE description='b' WHERE id::UInt64 IN (SELECT (number*10 + 2)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2; +ALTER TABLE 02581_trips UPDATE description='b' WHERE id::UInt64 IN (SELECT (number*10 + 2)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; SELECT count() from 02581_trips WHERE description = ''; -- Run mutation with non-PK `id2` IN big subquery -ALTER TABLE 02581_trips UPDATE description='c' WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2; +--SELECT count(), _part FROM 02581_trips WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000)) GROUP BY _part ORDER BY _part; +--EXPLAIN SELECT (), _part FROM 02581_trips WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000)); +ALTER TABLE 02581_trips UPDATE description='c' WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; SELECT count() from 02581_trips WHERE description = ''; -- Run mutation with PK and non-PK IN big subquery ALTER TABLE 02581_trips UPDATE description='c' WHERE - (id IN (SELECT (number*10 + 4)::UInt32 FROM numbers(200000000))) OR - (id2 IN (SELECT (number*10 + 4)::UInt32 FROM numbers(200000000))) + (id IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) OR + (id2 IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) SETTINGS mutations_sync=2; SELECT count() from 02581_trips WHERE description = ''; -- Run mutation with PK and non-PK IN big subquery ALTER TABLE 02581_trips UPDATE description='c' WHERE - (id::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(200000000))) OR - (id2::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(200000000))) + (id::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) OR + (id2::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) SETTINGS mutations_sync=2; SELECT count() from 02581_trips WHERE description = ''; -- Run mutation with PK and non-PK IN big subquery ALTER TABLE 02581_trips UPDATE description='c' WHERE - (id::UInt32 IN (SELECT (number*10 + 6)::UInt32 FROM numbers(200000000))) OR - ((id2+1)::String IN (SELECT (number*10 + 6)::UInt32 FROM numbers(200000000))) + (id::UInt32 IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) OR + ((id2+1)::String IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) SETTINGS mutations_sync=2; SELECT count() from 02581_trips WHERE description = ''; +SYSTEM FLUSH LOGS; +-- Check that in every mutation there were parts that built sets (log messages like 'Created Set with 10000000 entries from 10000000 rows in 0.388989187 sec.' ) +-- and parts that shared sets (log messages like 'Got set from cache in 0.388930505 sec.' ) +WITH ( + SELECT uuid + FROM system.tables + WHERE (database = currentDatabase()) AND (name = '02581_trips') + ) AS table_uuid +SELECT + CAST(splitByChar('_', query_id)[5], 'UInt64') AS mutation_version, -- '5521485f-8a40-4aba-87a2-00342c369563::all_3_3_0_6' + sum(message LIKE 'Created Set with % entries%') >= 1 AS has_parts_for_which_set_was_built, + sum(message LIKE 'Got set from cache%') >= 1 AS has_parts_that_shared_set +FROM system.text_log +WHERE + query_id LIKE concat(CAST(table_uuid, 'String'), '::all\\_%') + AND (event_date >= yesterday()) + AND (message LIKE 'Created Set with % entries%' OR message LIKE 'Got set from cache%') +GROUP BY mutation_version ORDER BY mutation_version FORMAT TSVWithNames; + DROP TABLE 02581_trips; diff --git a/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql b/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql index bde91df83ca..78ce1b68b0d 100644 --- a/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql +++ b/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql @@ -1,4 +1,10 @@ CREATE TABLE foo (key UInt32, a String, b Int64, c String) ENGINE = TinyLog; -SELECT count() FROM mysql(mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', ''), '127.0.0.1:9004', currentDatabase(), 'foo', '', ''); -- { serverError UNKNOWN_FUNCTION } -SELECT count() FROM mysql(mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', '', SETTINGS connection_pool_size = 1), '127.0.0.1:9004', currentDatabase(), 'foo', '', ''); -- { serverError UNKNOWN_FUNCTION, UNSUPPORTED_METHOD } +SELECT count() FROM mysql( + mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', ''), + '127.0.0.1:9004', currentDatabase(), 'foo', '', '', + SETTINGS connect_timeout = 100, connection_wait_timeout = 100, read_write_timeout = 300); -- { serverError UNKNOWN_FUNCTION } +SELECT count() FROM mysql( + mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', '', SETTINGS connection_pool_size = 1), + '127.0.0.1:9004', currentDatabase(), 'foo', '', '', + SETTINGS connect_timeout = 100, connection_wait_timeout = 100, read_write_timeout = 300); -- { serverError UNKNOWN_FUNCTION, UNSUPPORTED_METHOD } diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference +++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql index 13dfb5debe7..6aa70a379c1 100644 --- a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql +++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql @@ -6,3 +6,15 @@ ASOF LEFT JOIN ( select 1 as session_id, 4 as id ) as visitors ON visitors.session_id <= sessions.id AND arrayFirst(a -> a, arrayMap((a) -> a, sessions.arr)) = visitors.id +; + +select count(*) +from ( + select 1 as id, [1, 2, 3] as arr +) as sessions +ASOF LEFT JOIN ( + select 1 as session_id, 4 as id +) as visitors +ON visitors.session_id <= sessions.id AND arrayFirst(a -> a, arrayMap((a) -> a, sessions.arr)) = visitors.id +SETTINGS join_algorithm = 'full_sorting_merge' +; diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql index e393d96fc40..97d644fa4d6 100644 --- a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql @@ -11,9 +11,13 @@ SET async_insert_use_adaptive_busy_timeout=0; SET async_insert_busy_timeout_max_ms = 10000000; INSERT INTO t_async_inserts_flush VALUES (1) (2); + INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": 10} {"a": 20}; -INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": "str"} -INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": 100} {"a": 200} + +INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": "str"}; + +INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": 100} {"a": 200}; + INSERT INTO t_async_inserts_flush VALUES (3) (4) (5); SELECT sleep(1) FORMAT Null; diff --git a/tests/queries/0_stateless/02751_multiquery_with_argument.reference b/tests/queries/0_stateless/02751_multiquery_with_argument.reference index 4b0c199ed35..2e55712e49c 100644 --- a/tests/queries/0_stateless/02751_multiquery_with_argument.reference +++ b/tests/queries/0_stateless/02751_multiquery_with_argument.reference @@ -17,8 +17,6 @@ Bad arguments Bad arguments Bad arguments Bad arguments -Bad arguments -Bad arguments 320 317 Bad arguments diff --git a/tests/queries/0_stateless/02751_multiquery_with_argument.sh b/tests/queries/0_stateless/02751_multiquery_with_argument.sh index fea2978c116..7b959a3c3dc 100755 --- a/tests/queries/0_stateless/02751_multiquery_with_argument.sh +++ b/tests/queries/0_stateless/02751_multiquery_with_argument.sh @@ -22,14 +22,17 @@ $CLICKHOUSE_CLIENT --queries-file "queries.csv" --multiquery "SELECT 251;" 2>&1 $CLICKHOUSE_LOCAL -n "SELECT 301" $CLICKHOUSE_LOCAL -n "SELECT 302;" $CLICKHOUSE_LOCAL -n "SELECT 304;SELECT 305;" +# --multiquery and -n are obsolete by now and no-ops. +# The only exception is a single --multiquery "" $CLICKHOUSE_LOCAL --multiquery --multiquery 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_LOCAL -n --multiquery 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_LOCAL --multiquery -n 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_LOCAL --multiquery --multiquery "SELECT 306; SELECT 307;" 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_LOCAL -n --multiquery "SELECT 307; SELECT 308;" 2>&1 | grep -o 'Bad arguments' -$CLICKHOUSE_LOCAL --multiquery "SELECT 309; SELECT 310;" --multiquery 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_LOCAL --multiquery "SELECT 309; SELECT 310;" --multiquery 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_LOCAL --multiquery "SELECT 311;" --multiquery "SELECT 312;" 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_LOCAL --multiquery "SELECT 313;" -n "SELECT 314;" 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_LOCAL -n "SELECT 320" --query "SELECT 317;" +# --query should be followed by SQL $CLICKHOUSE_LOCAL --query -n "SELECT 400;" 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_LOCAL --query -n --multiquery "SELECT 401;" 2>&1 | grep -o 'Bad arguments' diff --git a/tests/queries/0_stateless/02771_multiple_query_arguments.reference b/tests/queries/0_stateless/02771_multiple_query_arguments.reference index 0c008e96965..b98ad7086a9 100644 --- a/tests/queries/0_stateless/02771_multiple_query_arguments.reference +++ b/tests/queries/0_stateless/02771_multiple_query_arguments.reference @@ -2,15 +2,17 @@ 101 202 202 -Multi-statements are not allowed -Empty query +303 +303 +303 Bad arguments Syntax error 101 101 202 202 -Multi-statements are not allowed -Empty query +303 +303 +303 Bad arguments Syntax error diff --git a/tests/queries/0_stateless/02771_multiple_query_arguments.sh b/tests/queries/0_stateless/02771_multiple_query_arguments.sh index 2958c9e9a72..ae6e23eb61a 100755 --- a/tests/queries/0_stateless/02771_multiple_query_arguments.sh +++ b/tests/queries/0_stateless/02771_multiple_query_arguments.sh @@ -7,15 +7,15 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # clickhouse-client $CLICKHOUSE_CLIENT --query "SELECT 101" --query "SELECT 101" $CLICKHOUSE_CLIENT --query "SELECT 202;" --query "SELECT 202;" -$CLICKHOUSE_CLIENT --query "SELECT 303" --query "SELECT 303; SELECT 303" 2>&1 | grep -o 'Multi-statements are not allowed' -$CLICKHOUSE_CLIENT --query "" --query "" 2>&1 | grep -o 'Empty query' +$CLICKHOUSE_CLIENT --query "SELECT 303" --query "SELECT 303; SELECT 303" +$CLICKHOUSE_CLIENT --query "" --query "" 2>&1 $CLICKHOUSE_CLIENT --query "SELECT 303" --query 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_CLIENT --query "SELECT 303" --query "SELE" 2>&1 | grep -o 'Syntax error' # clickhouse-local $CLICKHOUSE_LOCAL --query "SELECT 101" --query "SELECT 101" $CLICKHOUSE_LOCAL --query "SELECT 202;" --query "SELECT 202;" -$CLICKHOUSE_LOCAL --query "SELECT 303" --query "SELECT 303; SELECT 303" 2>&1 | grep -o 'Multi-statements are not allowed' # behaves differently than clickhouse-client, TODO make it consistent -$CLICKHOUSE_LOCAL --query "" --query "" 2>&1 | grep -o 'Empty query' # behaves equally different than clickhouse-client TODO +$CLICKHOUSE_LOCAL --query "SELECT 303" --query "SELECT 303; SELECT 303" +$CLICKHOUSE_LOCAL --query "" --query "" $CLICKHOUSE_LOCAL --query "SELECT 303" --query 2>&1 | grep -o 'Bad arguments' $CLICKHOUSE_LOCAL --query "SELECT 303" --query "SELE" 2>&1 | grep -o 'Syntax error' diff --git a/tests/queries/0_stateless/02805_distributed_queries_timeouts.reference b/tests/queries/0_stateless/02805_distributed_queries_timeouts.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql b/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql new file mode 100644 index 00000000000..f6bccc99977 --- /dev/null +++ b/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql @@ -0,0 +1,4 @@ +create table dist as system.one engine=Distributed(test_shard_localhost, system, one); +select sleep(8) from dist settings function_sleep_max_microseconds_per_block=8e9, prefer_localhost_replica=0, receive_timeout=7, async_socket_for_remote=0, use_hedged_requests=1 format Null; +select sleep(8) from dist settings function_sleep_max_microseconds_per_block=8e9, prefer_localhost_replica=0, receive_timeout=7, async_socket_for_remote=1, use_hedged_requests=0 format Null; +select sleep(8) from dist settings function_sleep_max_microseconds_per_block=8e9, prefer_localhost_replica=0, receive_timeout=7, async_socket_for_remote=0, use_hedged_requests=0 format Null; diff --git a/tests/queries/0_stateless/02833_concurrrent_sessions.reference b/tests/queries/0_stateless/02833_concurrent_sessions.reference similarity index 84% rename from tests/queries/0_stateless/02833_concurrrent_sessions.reference rename to tests/queries/0_stateless/02833_concurrent_sessions.reference index bfe507e8eac..904e8cefc6e 100644 --- a/tests/queries/0_stateless/02833_concurrrent_sessions.reference +++ b/tests/queries/0_stateless/02833_concurrent_sessions.reference @@ -1,34 +1,34 @@ sessions: -150 +45 port_0_sessions: 0 address_0_sessions: 0 tcp_sessions -60 +18 http_sessions -30 +9 http_with_session_id_sessions -30 -my_sql_sessions -30 +9 +mysql_sessions +9 Corresponding LoginSuccess/Logout -10 +3 LoginFailure -10 +3 Corresponding LoginSuccess/Logout -10 +3 LoginFailure -10 +3 Corresponding LoginSuccess/Logout -10 +3 LoginFailure -10 +3 Corresponding LoginSuccess/Logout -10 +3 LoginFailure -10 +3 Corresponding LoginSuccess/Logout -10 +3 LoginFailure -10 +3 diff --git a/tests/queries/0_stateless/02833_concurrrent_sessions.sh b/tests/queries/0_stateless/02833_concurrent_sessions.sh similarity index 88% rename from tests/queries/0_stateless/02833_concurrrent_sessions.sh rename to tests/queries/0_stateless/02833_concurrent_sessions.sh index c5b6204529b..846661cfeed 100755 --- a/tests/queries/0_stateless/02833_concurrrent_sessions.sh +++ b/tests/queries/0_stateless/02833_concurrent_sessions.sh @@ -37,11 +37,11 @@ done # These functions try to create a session with successful login and logout. # Sleep a small, random amount of time to make concurrency more intense. # and try to login with an invalid password. -function tcp_session() +function tcp_session() { local user=$1 local i=0 - while (( (i++) < 10 )); do + while (( (i++) < 3 )); do # login logout ${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM})" --user="${user}" --password="pass" # login failure @@ -49,11 +49,11 @@ function tcp_session() done } -function http_session() +function http_session() { local user=$1 local i=0 - while (( (i++) < 10 )); do + while (( (i++) < 3 )); do # login logout ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT 3, sleep(0.01${RANDOM})" @@ -62,11 +62,11 @@ function http_session() done } -function http_with_session_id_session() +function http_with_session_id_session() { local user=$1 local i=0 - while (( (i++) < 10 )); do + while (( (i++) < 3 )); do # login logout ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=pass" -d "SELECT 5, sleep 0.01${RANDOM}" @@ -75,11 +75,11 @@ function http_with_session_id_session() done } -function mysql_session() +function mysql_session() { local user=$1 local i=0 - while (( (i++) < 10 )); do + while (( (i++) < 3 )); do # login logout ${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM}) FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'pass')" @@ -97,29 +97,29 @@ export -f http_with_session_id_session; export -f mysql_session; for user in "${TCP_USERS[@]}"; do - timeout 60s bash -c "tcp_session ${user}" >/dev/null 2>&1 & + tcp_session ${user} >/dev/null 2>&1 & done for user in "${HTTP_USERS[@]}"; do - timeout 60s bash -c "http_session ${user}" >/dev/null 2>&1 & + http_session ${user} >/dev/null 2>&1 & done for user in "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}"; do - timeout 60s bash -c "http_with_session_id_session ${user}" >/dev/null 2>&1 & + http_with_session_id_session ${user} >/dev/null 2>&1 & done for user in "${MYSQL_USERS[@]}"; do - timeout 60s bash -c "mysql_session ${user}" >/dev/null 2>&1 & + mysql_session ${user} >/dev/null 2>&1 & done wait ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" -echo "sessions:" +echo "sessions:" ${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})" -echo "port_0_sessions:" +echo "port_0_sessions:" ${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_port = 0" echo "address_0_sessions:" @@ -131,13 +131,13 @@ echo "http_sessions" ${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'" echo "http_with_session_id_sessions" ${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'" -echo "my_sql_sessions" +echo "mysql_sessions" ${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${MYSQL_USERS_SQL_COLLECTION_STRING}) AND interface = 'MySQL'" for user in "${ALL_USERS[@]}"; do ${CLICKHOUSE_CLIENT} -q "DROP USER ${user}" - echo "Corresponding LoginSuccess/Logout" + echo "Corresponding LoginSuccess/Logout" ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'LoginSuccess' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'Logout')" echo "LoginFailure" - ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${user}' AND type = 'LoginFailure'" + ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${user}' AND type = 'LoginFailure'" done diff --git a/tests/queries/0_stateless/02842_truncate_database.reference b/tests/queries/0_stateless/02842_truncate_database.reference index 71f52bcd1da..bc8c0210d27 100644 --- a/tests/queries/0_stateless/02842_truncate_database.reference +++ b/tests/queries/0_stateless/02842_truncate_database.reference @@ -20,3 +20,5 @@ source_table_stripe_log source_table_tiny_log === DICTIONARIES IN test_truncate_database === dest_dictionary +new tables +new_table diff --git a/tests/queries/0_stateless/02842_truncate_database.sql b/tests/queries/0_stateless/02842_truncate_database.sql index 09ac844cfe2..bcd818f55ba 100644 --- a/tests/queries/0_stateless/02842_truncate_database.sql +++ b/tests/queries/0_stateless/02842_truncate_database.sql @@ -73,4 +73,8 @@ SELECT * FROM dest_dictionary; -- {serverError UNKNOWN_TABLE} SHOW TABLES FROM test_truncate_database; SHOW DICTIONARIES FROM test_truncate_database; +CREATE TABLE new_table (x UInt16) ENGINE = MergeTree ORDER BY x; +select 'new tables'; +SHOW TABLES FROM test_truncate_database; + DROP DATABASE test_truncate_database; diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference b/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference new file mode 100644 index 00000000000..02c41656a36 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference @@ -0,0 +1,14 @@ +CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +Test statistics count_min: + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed) +Test statistics multi-types: + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed) +Test LowCardinality and Nullable data type: +tab2 diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql b/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql new file mode 100644 index 00000000000..c730aa7b4a7 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql @@ -0,0 +1,70 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS tab SYNC; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET allow_suspicious_low_cardinality_types=1; +SET mutations_sync = 2; + +CREATE TABLE tab +( + a String, + b UInt64, + c Int64, + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; + +SHOW CREATE TABLE tab; + +INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'Test statistics count_min:'; + +ALTER TABLE tab ADD STATISTICS a TYPE count_min; +ALTER TABLE tab ADD STATISTICS b TYPE count_min; +ALTER TABLE tab ADD STATISTICS c TYPE count_min; +ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; + +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) xx +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +ALTER TABLE tab DROP STATISTICS a, b, c; + + +SELECT 'Test statistics multi-types:'; + +ALTER TABLE tab ADD STATISTICS a TYPE count_min; +ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest; +ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest; +ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; + +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +ALTER TABLE tab DROP STATISTICS a, b, c; + +DROP TABLE IF EXISTS tab SYNC; + + +SELECT 'Test LowCardinality and Nullable data type:'; +DROP TABLE IF EXISTS tab2 SYNC; +SET allow_suspicious_low_cardinality_types=1; +CREATE TABLE tab2 +( + a LowCardinality(Int64) STATISTICS(count_min), + b Nullable(Int64) STATISTICS(count_min), + c LowCardinality(Nullable(Int64)) STATISTICS(count_min), + pk String, +) Engine = MergeTree() ORDER BY pk; + +select name from system.tables where name = 'tab2' and database = currentDatabase(); + +DROP TABLE IF EXISTS tab2 SYNC; diff --git a/tests/queries/0_stateless/02864_statistics_uniq.sql b/tests/queries/0_stateless/02864_statistics_uniq.sql index d496392668b..0f5f353c045 100644 --- a/tests/queries/0_stateless/02864_statistics_uniq.sql +++ b/tests/queries/0_stateless/02864_statistics_uniq.sql @@ -70,3 +70,4 @@ SETTINGS min_bytes_for_wide_part = 0; INSERT INTO t3 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; DROP TABLE IF EXISTS t3; + diff --git a/tests/queries/0_stateless/02875_clickhouse_local_multiquery.reference b/tests/queries/0_stateless/02875_clickhouse_local_multiquery.reference index d99881dfa6b..1cc8f10a4cf 100644 --- a/tests/queries/0_stateless/02875_clickhouse_local_multiquery.reference +++ b/tests/queries/0_stateless/02875_clickhouse_local_multiquery.reference @@ -1,5 +1,7 @@ -Multi-statements are not allowed -Multi-statements are not allowed +1 +2 +1 +2 1 2 1 diff --git a/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh b/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh index e2dab337dc2..3f2b732e71b 100755 --- a/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh +++ b/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh @@ -4,10 +4,11 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# throw exception -$CLICKHOUSE_CLIENT -q "select 1; select 2;" 2>&1 | grep -o 'Multi-statements are not allowed' -$CLICKHOUSE_LOCAL -q "select 1; select 2;" 2>&1 | grep -o 'Multi-statements are not allowed' -# execute correctly +# clickhouse-local and clickhouse-client behave the same +$CLICKHOUSE_CLIENT -q "select 1; select 2;" +$CLICKHOUSE_LOCAL -q "select 1; select 2;" + +# -n is a no-op $CLICKHOUSE_CLIENT -n -q "select 1; select 2;" $CLICKHOUSE_LOCAL -n -q "select 1; select 2;" diff --git a/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.reference b/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.reference index 250a673a26b..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.reference +++ b/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.reference @@ -1,6 +0,0 @@ -client no multiquery -Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0) -client multiquery -local no multiquery -Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0) -local multiquery diff --git a/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.sh b/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.sh index 8164c91b2ae..2b9ac6bfd54 100755 --- a/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.sh +++ b/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.sh @@ -4,12 +4,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -echo "client no multiquery" -$CLICKHOUSE_CLIENT -q "insert into function null() values (1); -- { foo }" |& grep -F -o "Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0)" -echo "client multiquery" -$CLICKHOUSE_CLIENT -n -q "insert into function null() values (1); -- { foo }" - -echo "local no multiquery" -$CLICKHOUSE_LOCAL -q "insert into function null() values (1); -- { foo }" |& grep -F -o "Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0)" -echo "local multiquery" -$CLICKHOUSE_LOCAL -n -q "insert into function null() values (1); -- { foo }" +$CLICKHOUSE_CLIENT -q "insert into function null() values (1); -- { foo }" +$CLICKHOUSE_LOCAL -q "insert into function null() values (1); -- { foo }" diff --git a/tests/queries/0_stateless/02900_window_function_with_sparse_column.sql b/tests/queries/0_stateless/02900_window_function_with_sparse_column.sql index 0b053d3e870..6919e23ad1f 100644 --- a/tests/queries/0_stateless/02900_window_function_with_sparse_column.sql +++ b/tests/queries/0_stateless/02900_window_function_with_sparse_column.sql @@ -18,7 +18,7 @@ ENGINE = MergeTree PARTITION BY toYYYYMM(time) ORDER BY (key, id, time); -INSERT INTO test1 VALUES ('id0', now(), 3, false) +INSERT INTO test1 VALUES ('id0', now(), 3, false); SELECT last_value(value) OVER (PARTITION BY id ORDER BY time ASC) as last_value FROM test1 diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference index f376bb87044..fdefd2e3466 100644 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference @@ -1,10 +1,10 @@ -Creating 300 tables -900 1 1 -900 1 1 -900 1 1 -900 1 1 -Making 200 requests to system.replicas +Creating 50 tables +150 1 1 +150 1 1 +150 1 1 +150 1 1 +Making 100 requests to system.replicas Query system.replicas while waiting for other concurrent requests to finish 0 -900 +150 1 diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh index a247c99a818..81ba59fc591 100755 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh @@ -7,8 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -NUM_TABLES=300 -CONCURRENCY=200 +NUM_TABLES=50 +CONCURRENCY=100 echo "Creating $NUM_TABLES tables" @@ -46,10 +46,10 @@ wait; # Check results with different max_block_size -$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas) >= 2700, sum(active_replicas) >= 2700 FROM system.replicas WHERE database=currentDatabase()' -$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas) >= 2700, sum(active_replicas) >= 2700 FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=1' -$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas) >= 2700, sum(active_replicas) >= 2700 FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=77' -$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas) >= 2700, sum(active_replicas) >= 2700 FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=11111' +$CLICKHOUSE_CLIENT -q 'SELECT count() as c, sum(total_replicas) >= 3*c, sum(active_replicas) >= 3*c FROM system.replicas WHERE database=currentDatabase()' +$CLICKHOUSE_CLIENT -q 'SELECT count() as c, sum(total_replicas) >= 3*c, sum(active_replicas) >= 3*c FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=1' +$CLICKHOUSE_CLIENT -q 'SELECT count() as c, sum(total_replicas) >= 3*c, sum(active_replicas) >= 3*c FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=77' +$CLICKHOUSE_CLIENT -q 'SELECT count() as c, sum(total_replicas) >= 3*c, sum(active_replicas) >= 3*c FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=11111' echo "Making $CONCURRENCY requests to system.replicas" @@ -70,8 +70,8 @@ wait; $CLICKHOUSE_CLIENT -nq " SYSTEM FLUSH LOGS; --- without optimisation there are ~350K zk requests -SELECT sum(ProfileEvents['ZooKeeperTransactions']) < 30000 +-- Check that number of ZK request is less then a half of (total replicas * concurrency) +SELECT sum(ProfileEvents['ZooKeeperTransactions']) < (${NUM_TABLES} * 3 * ${CONCURRENCY} / 2) FROM system.query_log WHERE current_database=currentDatabase() AND log_comment='02908_many_requests'; " diff --git a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh index ccdf52a6e23..ee070b40f6f 100755 --- a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh +++ b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh @@ -11,11 +11,23 @@ $CLICKHOUSE_CLIENT -nm -q " CREATE TABLE $database_name.02911_backup_restore_keeper_map1 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key; CREATE TABLE $database_name.02911_backup_restore_keeper_map2 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key; -- table using same Keeper path as 02911_backup_restore_keeper_map1 CREATE TABLE $database_name.02911_backup_restore_keeper_map3 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911_different') PRIMARY KEY key; - - INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000; - INSERT INTO $database_name.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000; " +# KeeperMap table engine doesn't have internal retries for interaction with Keeper. Do it on our own, otherwise tests with overloaded server can be flaky. +while true +do + $CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000; + " 2>&1 | grep -q "KEEPER_EXCEPTION" && sleep 1 && continue + break +done + +while true +do + $CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000; + " 2>&1 | grep -q "KEEPER_EXCEPTION" && sleep 1 && continue + break +done + backup_path="$database_name" for i in $(seq 1 3); do $CLICKHOUSE_CLIENT -q "SELECT count() FROM $database_name.02911_backup_restore_keeper_map$i;" @@ -45,4 +57,4 @@ for i in $(seq 1 3); do $CLICKHOUSE_CLIENT -q "SELECT count() FROM $database_name.02911_backup_restore_keeper_map$i;" done -$CLICKHOUSE_CLIENT -q "DROP DATABASE $database_name SYNC;" \ No newline at end of file +$CLICKHOUSE_CLIENT -q "DROP DATABASE $database_name SYNC;" diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index 31a1cda18e7..352e02941c3 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -39,11 +39,22 @@ SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS N 2 2 2 2 3 3 3 33 \N \N \N \N +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND t2.x <> t1.x ) ORDER BY t1.x NULLS LAST; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t2.x IS NULL) ) ORDER BY t1.x NULLS LAST; -- { serverError INVALID_JOIN_ON_EXPRESSION } -- aliases defined in the join condition are valid --- FIXME(@vdimir) broken query formatting for the following queries: --- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; --- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; - +SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +1 42 \N \N \N 0 +2 2 2 2 1 1 +3 3 3 33 1 1 +\N \N 4 42 \N 0 +\N \N \N \N \N 1 +SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +1 42 \N \N \N 0 +2 2 2 2 1 1 +3 3 3 33 1 1 +\N \N 4 42 \N 0 +\N \N \N \N \N 0 +\N \N \N \N \N 0 -- check for non-nullable columns for which `is null` is replaced with constant SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; 2 2 2 2 diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index f739259caf9..ee5e1582015 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -35,11 +35,12 @@ SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND t2.x <> t1.x ) ORDER BY t1.x NULLS LAST; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t2.x IS NULL) ) ORDER BY t1.x NULLS LAST; -- { serverError INVALID_JOIN_ON_EXPRESSION } -- aliases defined in the join condition are valid --- FIXME(@vdimir) broken query formatting for the following queries: --- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; --- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; -- check for non-nullable columns for which `is null` is replaced with constant SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; diff --git a/tests/queries/0_stateless/02930_client_file_log_comment.sh b/tests/queries/0_stateless/02930_client_file_log_comment.sh index 3d0df6de9b4..50cd587e4b5 100755 --- a/tests/queries/0_stateless/02930_client_file_log_comment.sh +++ b/tests/queries/0_stateless/02930_client_file_log_comment.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment +# reset --log_comment, because the test has to set its own CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views.sh b/tests/queries/0_stateless/02932_refreshable_materialized_views.sh index 89942e25b67..6ffc4064413 100755 --- a/tests/queries/0_stateless/02932_refreshable_materialized_views.sh +++ b/tests/queries/0_stateless/02932_refreshable_materialized_views.sh @@ -2,8 +2,6 @@ # Tags: atomic-database CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh @@ -134,7 +132,7 @@ while [ "`$CLICKHOUSE_CLIENT -nq "select status, next_refresh_time from refreshe do sleep 0.1 done -sleep 1 + $CLICKHOUSE_CLIENT -nq " select '<14: waiting for next cycle>', view, status, remaining_dependencies, next_refresh_time from refreshes; truncate src; @@ -172,13 +170,13 @@ $CLICKHOUSE_CLIENT -nq " drop table b; create materialized view c refresh every 1 second (x Int64) engine Memory empty as select * from src; drop table src;" -while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Exception' ] +while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'c' -- $LINENO" | xargs`" != 'Exception' ] do sleep 0.1 done # Check exception, create src, expect successful refresh. $CLICKHOUSE_CLIENT -nq " - select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' from refreshes; + select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' ? '1' : exception from refreshes where view = 'c'; create table src (x Int64) engine Memory as select 1; system refresh view c;" while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Finished' ] @@ -197,7 +195,7 @@ $CLICKHOUSE_CLIENT -nq " $CLICKHOUSE_CLIENT -nq " drop table d; truncate src; - insert into src values (1) + insert into src values (1); create materialized view e refresh every 1 second (x Int64) engine MergeTree order by x empty as select x + sleepEachRow(1) as x from src settings max_block_size = 1;" while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Finished' ] do @@ -215,6 +213,7 @@ done # we wait for a slow refresh, not a previous fast one.) $CLICKHOUSE_CLIENT -nq " insert into src select * from numbers(1000) settings max_block_size=1; + system start view e;" while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes -- $LINENO" | xargs`" != 'Running' ] do @@ -224,22 +223,27 @@ done $CLICKHOUSE_CLIENT -nq " rename table e to f; select '<24: rename during refresh>', * from f; - select '<25: rename during refresh>', view, status from refreshes; + select '<25: rename during refresh>', view, status from refreshes where view = 'f'; alter table f modify refresh after 10 year;" -sleep 2 # make it likely that at least one row was processed + # Cancel. $CLICKHOUSE_CLIENT -nq " system cancel view f;" -while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Cancelled' ] +while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Cancelled' ] do sleep 0.1 done + +while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" = 'Running' ] +do + sleep 0.1 +done + # Check that another refresh doesn't immediately start after the cancelled one. -sleep 1 $CLICKHOUSE_CLIENT -nq " - select '<27: cancelled>', view, status from refreshes; + select '<27: cancelled>', view, status from refreshes where view = 'f'; system refresh view f;" -while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes -- $LINENO" | xargs`" != 'Running' ] +while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Running' ] do sleep 0.1 done diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference index 6ed281c757a..98fb6a68656 100644 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference @@ -1,2 +1,4 @@ 1 1 +1 +1 diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index 15f169d880f..b587549cb60 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -18,8 +18,12 @@ ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIAL ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" -# But it is allowed with the special setting -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 +# POPULATE is allowed with the special setting ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv3 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --compatibility='24.6' + +# AS SELECT is forbidden even with the setting +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --compatibility='24.6' |& grep -cm1 "SUPPORT_IS_DISABLED" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" diff --git a/tests/queries/0_stateless/02941_variant_type_1.sh b/tests/queries/0_stateless/02941_variant_type_1.sh index 723de45eaad..c12bece6d54 100755 --- a/tests/queries/0_stateless/02941_variant_type_1.sh +++ b/tests/queries/0_stateless/02941_variant_type_1.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh index 8453bce98dc..e93dfac8510 100755 --- a/tests/queries/0_stateless/02941_variant_type_2.sh +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02941_variant_type_3.sh b/tests/queries/0_stateless/02941_variant_type_3.sh index 990eb25b5be..cc0fde5b689 100755 --- a/tests/queries/0_stateless/02941_variant_type_3.sh +++ b/tests/queries/0_stateless/02941_variant_type_3.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh index b8f619694b0..93a1770d05e 100755 --- a/tests/queries/0_stateless/02941_variant_type_4.sh +++ b/tests/queries/0_stateless/02941_variant_type_4.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02941_variant_type_alters.sh b/tests/queries/0_stateless/02941_variant_type_alters.sh index 7c151d1fe9e..0b3ab8e925b 100755 --- a/tests/queries/0_stateless/02941_variant_type_alters.sh +++ b/tests/queries/0_stateless/02941_variant_type_alters.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh index 5ca8dd5f36f..21ab877eaf3 100755 --- a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh +++ b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh @@ -34,4 +32,3 @@ echo "MergeTree wide" $CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" test $CH_CLIENT -q "drop table test;" - diff --git a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh index 1d88757a5d6..4aa4b11cecb 100755 --- a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh +++ b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh @@ -2,8 +2,6 @@ # Tags: long, no-debug CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02950_dictionary_short_circuit.sql b/tests/queries/0_stateless/02950_dictionary_short_circuit.sql index 12c934a8d2d..c613709a975 100644 --- a/tests/queries/0_stateless/02950_dictionary_short_circuit.sql +++ b/tests/queries/0_stateless/02950_dictionary_short_circuit.sql @@ -241,12 +241,12 @@ CREATE TABLE regexp_dictionary_source_table values Array(String), ) ENGINE=TinyLog; -INSERT INTO regexp_dictionary_source_table VALUES (1, 0, 'Linux/(\d+[\.\d]*).+tlinux', ['name', 'version'], ['TencentOS', '\1']) -INSERT INTO regexp_dictionary_source_table VALUES (2, 0, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Android', '$1', 'test $1 and $2']) -INSERT INTO regexp_dictionary_source_table VALUES (3, 2, '33/tclwebkit', ['version'], ['13']) -INSERT INTO regexp_dictionary_source_table VALUES (4, 2, '3[12]/tclwebkit', ['version'], ['12']) -INSERT INTO regexp_dictionary_source_table VALUES (5, 2, '3[12]/tclwebkit', ['version'], ['11']) -INSERT INTO regexp_dictionary_source_table VALUES (6, 2, '3[12]/tclwebkit', ['version'], ['10']) +INSERT INTO regexp_dictionary_source_table VALUES (1, 0, 'Linux/(\d+[\.\d]*).+tlinux', ['name', 'version'], ['TencentOS', '\1']); +INSERT INTO regexp_dictionary_source_table VALUES (2, 0, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Android', '$1', 'test $1 and $2']); +INSERT INTO regexp_dictionary_source_table VALUES (3, 2, '33/tclwebkit', ['version'], ['13']); +INSERT INTO regexp_dictionary_source_table VALUES (4, 2, '3[12]/tclwebkit', ['version'], ['12']); +INSERT INTO regexp_dictionary_source_table VALUES (5, 2, '3[12]/tclwebkit', ['version'], ['11']); +INSERT INTO regexp_dictionary_source_table VALUES (6, 2, '3[12]/tclwebkit', ['version'], ['10']); DROP DICTIONARY IF EXISTS regexp_dict; create dictionary regexp_dict diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.reference b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.reference new file mode 100644 index 00000000000..e1bf9c27a81 --- /dev/null +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.reference @@ -0,0 +1,30 @@ + +simple join with analyzer +4200000 4200000 4200000 -1400000 +4200006 4200006 4200006 -1400002 +4200012 4200012 4200012 -1400004 +4200018 4200018 4200018 -1400006 +4200024 4200024 4200024 -1400008 +4200030 4200030 4200030 -1400010 +4200036 4200036 4200036 -1400012 +4200042 4200042 4200042 -1400014 +4200048 4200048 4200048 -1400016 +4200054 4200054 4200054 -1400018 + +simple (global) join with analyzer and parallel replicas +4200000 4200000 4200000 -1400000 +4200006 4200006 4200006 -1400002 +4200012 4200012 4200012 -1400004 +4200018 4200018 4200018 -1400006 +4200024 4200024 4200024 -1400008 +4200030 4200030 4200030 -1400010 +4200036 4200036 4200036 -1400012 +4200042 4200042 4200042 -1400014 +4200048 4200048 4200048 -1400016 +4200054 4200054 4200054 -1400018 +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) + DefaultCoordinator: Coordination done +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) + DefaultCoordinator: Coordination done diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.sh new file mode 100755 index 00000000000..1089eb4051f --- /dev/null +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Tags: long, no-random-settings, no-random-merge-tree-settings + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -nm -q " +drop table if exists num_1; +drop table if exists num_2; + +create table num_1 (key UInt64, value String) engine = MergeTree order by key; +create table num_2 (key UInt64, value Int64) engine = MergeTree order by key; + +insert into num_1 select number * 2, toString(number * 2) from numbers(1e7); +insert into num_2 select number * 3, -number from numbers(1.5e6); +" + +############## +echo +echo "simple join with analyzer" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1" + +############## +echo +echo "simple (global) join with analyzer and parallel replicas" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, +max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, send_logs_level='trace', +max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | +grep "executeQuery\|.*Coordinator: Coordination done" | +grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | +sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.reference b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.reference new file mode 100644 index 00000000000..297ec311f3e --- /dev/null +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.reference @@ -0,0 +1,57 @@ + +simple (local) join with analyzer and parallel replicas +4200000 4200000 4200000 -1400000 +4200006 4200006 4200006 -1400002 +4200012 4200012 4200012 -1400004 +4200018 4200018 4200018 -1400006 +4200024 4200024 4200024 -1400008 +4200030 4200030 4200030 -1400010 +4200036 4200036 4200036 -1400012 +4200042 4200042 4200042 -1400014 +4200048 4200048 4200048 -1400016 +4200054 4200054 4200054 -1400018 +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) + DefaultCoordinator: Coordination done + +simple (local) join with analyzer and parallel replicas and full sorting merge join +4200000 4200000 4200000 -1400000 +4200006 4200006 4200006 -1400002 +4200012 4200012 4200012 -1400004 +4200018 4200018 4200018 -1400006 +4200024 4200024 4200024 -1400008 +4200030 4200030 4200030 -1400010 +4200036 4200036 4200036 -1400012 +4200042 4200042 4200042 -1400014 +4200048 4200048 4200048 -1400016 +4200054 4200054 4200054 -1400018 +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) + WithOrderCoordinator: Coordination done + +nested join with analyzer +420000 420000 420000 -140000 +420042 420042 420042 -140014 +420084 420084 420084 -140028 +420126 420126 420126 -140042 +420168 420168 420168 -140056 +420210 420210 420210 -140070 +420252 420252 420252 -140084 +420294 420294 420294 -140098 +420336 420336 420336 -140112 +420378 420378 420378 -140126 + +nested join with analyzer and parallel replicas, both local +420000 420000 420000 -140000 +420042 420042 420042 -140014 +420084 420084 420084 -140028 +420126 420126 420126 -140042 +420168 420168 420168 -140056 +420210 420210 420210 -140070 +420252 420252 420252 -140084 +420294 420294 420294 -140098 +420336 420336 420336 -140112 +420378 420378 420378 -140126 +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) + WithOrderCoordinator: Coordination done diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.sh new file mode 100755 index 00000000000..7a0e2d9bfdb --- /dev/null +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash +# Tags: long, no-random-settings, no-random-merge-tree-settings + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -nm -q " +drop table if exists num_1; +drop table if exists num_2; + +create table num_1 (key UInt64, value String) engine = MergeTree order by key; +create table num_2 (key UInt64, value Int64) engine = MergeTree order by key; + +insert into num_1 select number * 2, toString(number * 2) from numbers(1e7); +insert into num_2 select number * 3, -number from numbers(1.5e6); +" + +############## +echo +echo "simple (local) join with analyzer and parallel replicas" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | +grep "executeQuery\|.*Coordinator: Coordination done" | +grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | +sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' + + +############## +echo +echo "simple (local) join with analyzer and parallel replicas and full sorting merge join" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | +grep "executeQuery\|.*Coordinator: Coordination done" | +grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | +sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' + + +############## +echo +echo "nested join with analyzer" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2 inner join + (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r +on l.key = r.key order by l.key limit 10 offset 10000 +SETTINGS allow_experimental_analyzer=1" + + +############## +echo +echo "nested join with analyzer and parallel replicas, both local" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2 inner join + (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r +on l.key = r.key order by l.key limit 10 offset 10000 +SETTINGS allow_experimental_analyzer=1, +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2 inner join + (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r +on l.key = r.key order by l.key limit 10 offset 10000 +SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | +grep "executeQuery\|.*Coordinator: Coordination done" | +grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | +sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.reference similarity index 55% rename from tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference rename to tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.reference index d7fa419aeab..c0485b817c4 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.reference @@ -1,91 +1,4 @@ -simple join with analyzer -4200000 4200000 4200000 -1400000 -4200006 4200006 4200006 -1400002 -4200012 4200012 4200012 -1400004 -4200018 4200018 4200018 -1400006 -4200024 4200024 4200024 -1400008 -4200030 4200030 4200030 -1400010 -4200036 4200036 4200036 -1400012 -4200042 4200042 4200042 -1400014 -4200048 4200048 4200048 -1400016 -4200054 4200054 4200054 -1400018 - -simple (global) join with analyzer and parallel replicas -4200000 4200000 4200000 -1400000 -4200006 4200006 4200006 -1400002 -4200012 4200012 4200012 -1400004 -4200018 4200018 4200018 -1400006 -4200024 4200024 4200024 -1400008 -4200030 4200030 4200030 -1400010 -4200036 4200036 4200036 -1400012 -4200042 4200042 4200042 -1400014 -4200048 4200048 4200048 -1400016 -4200054 4200054 4200054 -1400018 -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) - DefaultCoordinator: Coordination done -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) - DefaultCoordinator: Coordination done - -simple (local) join with analyzer and parallel replicas -4200000 4200000 4200000 -1400000 -4200006 4200006 4200006 -1400002 -4200012 4200012 4200012 -1400004 -4200018 4200018 4200018 -1400006 -4200024 4200024 4200024 -1400008 -4200030 4200030 4200030 -1400010 -4200036 4200036 4200036 -1400012 -4200042 4200042 4200042 -1400014 -4200048 4200048 4200048 -1400016 -4200054 4200054 4200054 -1400018 -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) - DefaultCoordinator: Coordination done - -simple (local) join with analyzer and parallel replicas and full sorting merge join -4200000 4200000 4200000 -1400000 -4200006 4200006 4200006 -1400002 -4200012 4200012 4200012 -1400004 -4200018 4200018 4200018 -1400006 -4200024 4200024 4200024 -1400008 -4200030 4200030 4200030 -1400010 -4200036 4200036 4200036 -1400012 -4200042 4200042 4200042 -1400014 -4200048 4200048 4200048 -1400016 -4200054 4200054 4200054 -1400018 -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) - WithOrderCoordinator: Coordination done - -nested join with analyzer -420000 420000 420000 -140000 -420042 420042 420042 -140014 -420084 420084 420084 -140028 -420126 420126 420126 -140042 -420168 420168 420168 -140056 -420210 420210 420210 -140070 -420252 420252 420252 -140084 -420294 420294 420294 -140098 -420336 420336 420336 -140112 -420378 420378 420378 -140126 - -nested join with analyzer and parallel replicas, both local -420000 420000 420000 -140000 -420042 420042 420042 -140014 -420084 420084 420084 -140028 -420126 420126 420126 -140042 -420168 420168 420168 -140056 -420210 420210 420210 -140070 -420252 420252 420252 -140084 -420294 420294 420294 -140098 -420336 420336 420336 -140112 -420378 420378 420378 -140126 -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) - WithOrderCoordinator: Coordination done - nested join with analyzer and parallel replicas, both global 420000 420000 420000 -140000 420042 420042 420042 -140014 diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh similarity index 58% rename from tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh rename to tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh index 2840482da6d..e49a340ab67 100755 --- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh @@ -17,125 +17,6 @@ insert into num_1 select number * 2, toString(number * 2) from numbers(1e7); insert into num_2 select number * 3, -number from numbers(1.5e6); " -############## -echo -echo "simple join with analyzer" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1" - -############## -echo -echo "simple (global) join with analyzer and parallel replicas" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, -max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, send_logs_level='trace', -max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | -grep "executeQuery\|.*Coordinator: Coordination done" | -grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | -sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' - -############## -echo -echo "simple (local) join with analyzer and parallel replicas" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | -grep "executeQuery\|.*Coordinator: Coordination done" | -grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | -sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' - - -############## -echo -echo "simple (local) join with analyzer and parallel replicas and full sorting merge join" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | -grep "executeQuery\|.*Coordinator: Coordination done" | -grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | -sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' - - -############## -echo -echo "nested join with analyzer" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2 inner join - (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r -on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1" - - -############## -echo -echo "nested join with analyzer and parallel replicas, both local" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2 inner join - (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r -on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2 inner join - (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r -on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | -grep "executeQuery\|.*Coordinator: Coordination done" | -grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | -sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' - - ############## echo echo "nested join with analyzer and parallel replicas, both global" diff --git a/tests/queries/0_stateless/02974_analyzer_array_join_subcolumn.sql b/tests/queries/0_stateless/02974_analyzer_array_join_subcolumn.sql index 14823644b96..30fb3c76c1f 100644 --- a/tests/queries/0_stateless/02974_analyzer_array_join_subcolumn.sql +++ b/tests/queries/0_stateless/02974_analyzer_array_join_subcolumn.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS t2; DROP TABLE IF EXISTS t3; CREATE TABLE t2 (id Int32, pe Map(String, Tuple(a UInt64, b UInt64))) ENGINE = MergeTree ORDER BY id; -INSERT INTO t2 VALUES (1, {'a': (1, 2), 'b': (2, 3)}), +INSERT INTO t2 VALUES (1, {'a': (1, 2), 'b': (2, 3)}),; CREATE TABLE t3 (id Int32, c Tuple(v String, pe Map(String, Tuple(a UInt64, b UInt64)))) ENGINE = MergeTree ORDER BY id; INSERT INTO t3 VALUES (1, ('A', {'a':(1, 2),'b':(2, 3)})); diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh deleted file mode 100755 index 84183606d48..00000000000 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-random-settings - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - - -query_id="02982_$RANDOM" -$CLICKHOUSE_CLIENT --query_id $query_id --log_query_threads 1 --query="select number, uniq(number) from numbers_mt(1e7) group by number limit 100 format Null;" - -$CLICKHOUSE_CLIENT -q "system flush logs;" - -$CLICKHOUSE_CLIENT -q "select count() > 0 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase() and thread_name = 'AggregDestruct';" diff --git a/tests/queries/0_stateless/02987_group_array_intersect.reference b/tests/queries/0_stateless/02987_group_array_intersect.reference index 7ec64a889f5..ec4d07742af 100644 --- a/tests/queries/0_stateless/02987_group_array_intersect.reference +++ b/tests/queries/0_stateless/02987_group_array_intersect.reference @@ -8,12 +8,12 @@ [1,4,5] [] [] -1000000 -999999 +100000 +99999 [9] ['a','c'] -1000000 -999999 +50000 +49999 ['1'] [] ['2023-01-01 00:00:00'] diff --git a/tests/queries/0_stateless/02987_group_array_intersect.sql b/tests/queries/0_stateless/02987_group_array_intersect.sql index 321e860b0a8..15acd0ca900 100644 --- a/tests/queries/0_stateless/02987_group_array_intersect.sql +++ b/tests/queries/0_stateless/02987_group_array_intersect.sql @@ -39,15 +39,15 @@ DROP TABLE test_numbers; DROP TABLE IF EXISTS test_big_numbers_sep; CREATE TABLE test_big_numbers_sep (a Array(Int64)) engine=MergeTree ORDER BY a; -INSERT INTO test_big_numbers_sep SELECT array(number) FROM numbers_mt(1000000); +INSERT INTO test_big_numbers_sep SELECT array(number) FROM numbers_mt(100000); SELECT groupArrayIntersect(*) FROM test_big_numbers_sep; DROP TABLE test_big_numbers_sep; DROP TABLE IF EXISTS test_big_numbers; CREATE TABLE test_big_numbers (a Array(Int64)) engine=MergeTree ORDER BY a; -INSERT INTO test_big_numbers SELECT range(1000000); +INSERT INTO test_big_numbers SELECT range(100000); SELECT length(groupArrayIntersect(*)) FROM test_big_numbers; -INSERT INTO test_big_numbers SELECT range(999999); +INSERT INTO test_big_numbers SELECT range(99999); SELECT length(groupArrayIntersect(*)) FROM test_big_numbers; INSERT INTO test_big_numbers VALUES ([9]); SELECT groupArrayIntersect(*) FROM test_big_numbers; @@ -63,9 +63,9 @@ DROP TABLE test_string; DROP TABLE IF EXISTS test_big_string; CREATE TABLE test_big_string (a Array(String)) engine=MergeTree ORDER BY a; -INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(1000000); +INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(50000); SELECT length(groupArrayIntersect(*)) FROM test_big_string; -INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(999999); +INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(49999); SELECT length(groupArrayIntersect(*)) FROM test_big_string; INSERT INTO test_big_string VALUES (['1']); SELECT groupArrayIntersect(*) FROM test_big_string; diff --git a/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql index 127c6fee07d..ad056384bfd 100644 --- a/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql +++ b/tests/queries/0_stateless/02992_all_columns_should_have_comment.sql @@ -1,4 +1,8 @@ SYSTEM FLUSH LOGS; SELECT 'Column ' || name || ' from table ' || concat(database, '.', table) || ' should have a comment' FROM system.columns -WHERE (database = 'system') AND (comment = '') AND (table NOT ILIKE '%_log_%') AND (table NOT IN ('numbers', 'numbers_mt', 'one', 'generate_series', 'generateSeries', 'coverage_log')) AND (default_kind != 'ALIAS'); +WHERE (database = 'system') AND + (comment = '') AND + (table NOT ILIKE '%log%') AND + (table NOT IN ('numbers', 'numbers_mt', 'one', 'generate_series', 'generateSeries', 'coverage_log', 'filesystem_read_prefetches_log')) AND + (default_kind != 'ALIAS'); diff --git a/tests/queries/0_stateless/02995_index.reference b/tests/queries/0_stateless/02995_index.reference deleted file mode 100644 index 1e8639caa88..00000000000 --- a/tests/queries/0_stateless/02995_index.reference +++ /dev/null @@ -1,126 +0,0 @@ -12 4 21722 2209341 4 1415 2333 4 61 64 3 -21 1134 11363 58484 1106 1458 1592 136 26 62 32 -22 210 4504 5729 196 291 767 124 47 54 8 -26 196 1327684 5221 195 4140 5661 161 28 49 19 -28 5 2034378 7102 5 325 3255 2 53 60 4 -29 53 45041 45189 45 1580 211 31 55 84 18 -38 424 1600675 4653 424 562 5944 244 60 65 6 -45 17 62743 674873 17 6239 6494 17 65 76 8 -72 1862 1210073 6200 1677 2498 528 859 51 61 11 -79 2 2255228 2255293 2 5495 7057 2 65 65 1 -85 459 1051571 1829638 459 6402 7131 334 32 61 25 -86 10 1748130 1754217 10 4374 7003 10 56 59 4 -91 165 5718 5802 75 282 7113 112 41 63 22 -94 20 1231916 2050003 20 4802 4917 19 53 59 7 -99 2 3665 36667 2 497 697 2 70 71 2 -103 1 2446615 2446615 1 2498 2498 1 58 58 1 -106 72 6149 6699 67 527 826 40 61 61 1 -111 43 2273186 5272 43 492 4923 4 54 72 15 -120 3129 45117 6735 2868 1030 1625 561 59 64 6 -138 2 49243 49374 2 1428 1519 2 47 48 2 -143 100 23321 63639 100 1115 1624 88 51 51 1 -145 1 2447976 2447976 1 6173 6173 1 44 44 1 -153 16 13748 16881 16 1506 1636 16 54 68 9 -159 19952 1525336 7131 12957 1280 6163 2668 24 66 39 -171 5 15042 16698 5 1302 1608 5 65 65 1 -179 6264 1362341 2686 6244 2554 7132 2705 61 67 7 -192 1 1639623 1639623 1 3406 3406 1 32 32 1 -193 1 1429969 1429969 1 7131 7131 1 45 45 1 -207 12 23057 32500 12 1491 1726 12 32 46 7 -221 5081 1366870 6649 3432 4527 5226 687 24 69 39 -228 73 12281 17929 71 1328 2034 63 49 71 18 -229 2 1617478 1723791 2 4590 5578 2 41 42 2 -230 3916 1332729 6949 3668 1330 4703 845 62 65 4 -238 25 2624456 2625673 24 2535 6465 25 58 75 14 -241 154 2554929 2616444 154 2626 7131 148 34 57 17 -248 276 15529 30987 274 1040 1222 136 37 79 27 -254 3018 33966 6635 2837 1057 1622 539 24 60 33 -255 20 1581774 1811334 20 6068 6301 18 33 57 10 -256 5 5145 6841 5 367 376 5 58 58 1 -270 2 2195579 2262119 2 7102 7123 2 33 34 2 -281 32 2379460 616215 32 6042 6086 23 53 64 12 -282 7 1292651 24244 7 1607 2455 6 46 55 5 -286 123 1521935 5269 123 3793 3940 81 40 66 22 -291 21 2419080 3567 21 297 4731 21 54 55 2 -316 4 5221 5616 4 505 558 4 32 35 3 -319 232 56480 63033 230 1599 313 50 33 64 26 -327 15 51647 51894 14 1292 1585 14 47 57 7 -332 24 23484 54948 24 1609 1726 16 32 49 11 -333 1 14189 14189 1 1550 1550 1 63 63 1 -342 49 2579220 2622432 49 4626 6933 48 34 54 14 -344 1 6486 6486 1 509 509 1 24 24 1 -346 1987 53016 6735 1823 1334 174 294 26 62 32 -358 45 59058 60844 44 6746 722 40 57 84 15 -363 1198 1260033 2568811 1196 5710 5790 82 55 80 26 -384 150 2361175 476024 150 7008 7123 81 38 64 22 -387 277 5200 6553 252 243 521 130 65 65 1 -392 1877 1607428 2030850 1875 1416 7131 1379 54 66 13 -396 8181 1380803 6186 7920 545 798 1743 24 67 39 -398 3 5183 5213 2 291 352 3 53 59 3 -399 62 51494 59203 61 7073 754 42 55 78 18 -412 2141 1360120 2189792 2136 2491 5658 1371 71 75 5 -413 2 2036037 2064917 2 3963 4666 2 43 45 2 -431 33 2302331 2348449 33 4425 6516 32 69 69 1 -447 59 25125 33094 59 1176 1817 56 53 58 6 -456 1 53157 53157 1 1556 1556 1 26 26 1 -462 5 5456 6280 5 348 4337 5 28 40 5 -472 1 1443716 1443716 1 6122 6122 1 42 42 1 -491 34 1066102 1183673 34 6606 6822 32 46 67 15 -498 896 2230163 3054 895 537 7131 714 24 59 28 -504 108 12281 25180 108 1318 1784 94 55 66 12 -515 22 1588883 2640809 22 6554 6571 15 46 59 12 -518 1 37743 37743 1 1558 1558 1 72 72 1 -530 1 3033 3033 1 561 561 1 59 59 1 -532 26 5721 6355 25 549 665 14 44 50 7 -546 156 2577874 48517 156 1105 324 133 44 51 8 -554 12 1665194 2640066 12 1817 2951 12 57 57 1 -564 3865 2028049 2083433 3722 1115 985 2203 44 84 41 -566 4432 50605 57509 3217 1191 267 459 26 72 39 -567 8 5221 5893 7 333 558 8 27 35 4 -582 1172 1320619 2019743 1172 5819 7131 757 26 63 30 -584 43100 2500 5594 22561 134 4573 1660 48 84 37 -589 28 6046 6068 19 345 564 27 55 62 8 -595 139 1585165 1683606 138 2231 3598 132 54 84 28 -615 3 1056081 1116230 3 5794 5796 2 59 62 3 -619 7 1543114 5241 7 2442 3105 7 41 45 3 -634 2722 1221058 4999 2686 2426 7131 1735 54 60 7 -635 237 2119333 4667 237 561 5999 176 49 60 12 -644 5 1774169 2056171 5 5591 6091 4 33 39 3 -647 8 51632 64403 8 1457 1624 8 26 34 5 -651 1325 1620565 6281 1301 528 792 815 62 63 2 -665 13 4598 4789 13 511 558 11 39 46 7 -679 1560 1613200 25940 1552 1569 3118 781 49 84 35 -704 2 14226 15594 2 1086 1116 2 65 71 2 -715 25 1199352 3490 25 5036 5112 23 34 55 13 -716 1253 61989 6735 1050 1203 1625 397 52 65 14 -730 2584 5560 6170 634 2421 627 293 56 69 14 -736 8 1433153 4941 8 339 4594 8 28 36 5 -749 2 1326176 1339862 2 4339 6213 2 49 50 2 -753 1 53157 53157 1 1556 1556 1 26 26 1 -761 63 1443230 6881 63 3154 3204 26 56 73 14 -762 49 1449596 1968154 49 2437 3753 48 54 62 9 -775 35107 5330 769436 2471 447 6607 656 70 81 12 -789 1 1552458 1552458 1 2441 2441 1 62 62 1 -794 158 5585 6585 155 495 929 67 24 50 20 -839 9 29223 46530 9 1336 1465 9 52 52 1 -844 5 2377545 2377635 5 5129 6321 5 53 69 5 -846 50 2172273 2589295 50 1582 3053 48 64 68 5 -847 2577 56656 63658 1582 1444 838 474 26 63 33 -861 1333 5570 6909 839 457 489 37 33 70 34 -873 2360 1519811 50487 2248 1310 1784 316 60 68 9 -879 228 6704 6785 79 279 507 121 35 66 24 -889 5130 2070007 39692 5040 1151 6791 2606 44 66 23 -896 4 511246 859452 4 6554 6561 4 67 71 4 -912 146 1322641 2238040 146 1366 6354 143 59 59 1 -913 82 5495 6870 78 350 565 67 24 43 15 -921 763 1580790 416881 763 6191 7131 509 63 64 2 -925 318 2500952 5025 309 476 6114 182 32 56 21 -931 12 4277 4809 12 238 256 9 63 83 9 -942 954 1331 2228193 952 1121 5047 788 65 70 6 -948 14 1785593 2600431 14 6550 6598 13 34 49 9 -956 5 5755 6023 5 359 411 5 43 48 4 -963 4 3812 3835 4 444 537 4 47 53 4 -978 5 51632 58212 5 1127 1556 5 24 32 5 -980 53 47201 59744 53 1537 1625 36 41 49 9 -987 6033 2020131 763444 4306 256 792 1832 60 64 5 -993 4 1615159 1718339 4 1570 3093 4 62 63 2 diff --git a/tests/queries/0_stateless/02995_index_1.reference b/tests/queries/0_stateless/02995_index_1.reference new file mode 100644 index 00000000000..6c3b1230db6 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_1.reference @@ -0,0 +1,15 @@ +12 4 21722 2209341 4 1415 2333 4 61 64 3 +21 1134 11363 58484 1106 1458 1592 136 26 62 32 +22 210 4504 5729 196 291 767 124 47 54 8 +26 196 1327684 5221 195 4140 5661 161 28 49 19 +28 5 2034378 7102 5 325 3255 2 53 60 4 +29 53 45041 45189 45 1580 211 31 55 84 18 +38 424 1600675 4653 424 562 5944 244 60 65 6 +45 17 62743 674873 17 6239 6494 17 65 76 8 +72 1862 1210073 6200 1677 2498 528 859 51 61 11 +79 2 2255228 2255293 2 5495 7057 2 65 65 1 +85 459 1051571 1829638 459 6402 7131 334 32 61 25 +86 10 1748130 1754217 10 4374 7003 10 56 59 4 +91 165 5718 5802 75 282 7113 112 41 63 22 +94 20 1231916 2050003 20 4802 4917 19 53 59 7 +99 2 3665 36667 2 497 697 2 70 71 2 diff --git a/tests/queries/0_stateless/02995_index.sh b/tests/queries/0_stateless/02995_index_1.sh similarity index 98% rename from tests/queries/0_stateless/02995_index.sh rename to tests/queries/0_stateless/02995_index_1.sh index 5125d03904e..a5f1b30c2e8 100755 --- a/tests/queries/0_stateless/02995_index.sh +++ b/tests/queries/0_stateless/02995_index_1.sh @@ -26,7 +26,7 @@ DETACH TABLE test; ATTACH TABLE test; " -for i in {1..1000} +for i in {1..100} do echo " WITH ${i} AS try diff --git a/tests/queries/0_stateless/02995_index_10.reference b/tests/queries/0_stateless/02995_index_10.reference new file mode 100644 index 00000000000..bfa38d03801 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_10.reference @@ -0,0 +1,13 @@ +912 146 1322641 2238040 146 1366 6354 143 59 59 1 +913 82 5495 6870 78 350 565 67 24 43 15 +921 763 1580790 416881 763 6191 7131 509 63 64 2 +925 318 2500952 5025 309 476 6114 182 32 56 21 +931 12 4277 4809 12 238 256 9 63 83 9 +942 954 1331 2228193 952 1121 5047 788 65 70 6 +948 14 1785593 2600431 14 6550 6598 13 34 49 9 +956 5 5755 6023 5 359 411 5 43 48 4 +963 4 3812 3835 4 444 537 4 47 53 4 +978 5 51632 58212 5 1127 1556 5 24 32 5 +980 53 47201 59744 53 1537 1625 36 41 49 9 +987 6033 2020131 763444 4306 256 792 1832 60 64 5 +993 4 1615159 1718339 4 1570 3093 4 62 63 2 diff --git a/tests/queries/0_stateless/02995_index_10.sh b/tests/queries/0_stateless/02995_index_10.sh new file mode 100755 index 00000000000..d72c7c72705 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_10.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {901..1000} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_2.reference b/tests/queries/0_stateless/02995_index_2.reference new file mode 100644 index 00000000000..098292a289f --- /dev/null +++ b/tests/queries/0_stateless/02995_index_2.reference @@ -0,0 +1,13 @@ +103 1 2446615 2446615 1 2498 2498 1 58 58 1 +106 72 6149 6699 67 527 826 40 61 61 1 +111 43 2273186 5272 43 492 4923 4 54 72 15 +120 3129 45117 6735 2868 1030 1625 561 59 64 6 +138 2 49243 49374 2 1428 1519 2 47 48 2 +143 100 23321 63639 100 1115 1624 88 51 51 1 +145 1 2447976 2447976 1 6173 6173 1 44 44 1 +153 16 13748 16881 16 1506 1636 16 54 68 9 +159 19952 1525336 7131 12957 1280 6163 2668 24 66 39 +171 5 15042 16698 5 1302 1608 5 65 65 1 +179 6264 1362341 2686 6244 2554 7132 2705 61 67 7 +192 1 1639623 1639623 1 3406 3406 1 32 32 1 +193 1 1429969 1429969 1 7131 7131 1 45 45 1 diff --git a/tests/queries/0_stateless/02995_index_2.sh b/tests/queries/0_stateless/02995_index_2.sh new file mode 100755 index 00000000000..e7451c7ee4b --- /dev/null +++ b/tests/queries/0_stateless/02995_index_2.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {101..200} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_3.reference b/tests/queries/0_stateless/02995_index_3.reference new file mode 100644 index 00000000000..9c2fca9fde6 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_3.reference @@ -0,0 +1,16 @@ +207 12 23057 32500 12 1491 1726 12 32 46 7 +221 5081 1366870 6649 3432 4527 5226 687 24 69 39 +228 73 12281 17929 71 1328 2034 63 49 71 18 +229 2 1617478 1723791 2 4590 5578 2 41 42 2 +230 3916 1332729 6949 3668 1330 4703 845 62 65 4 +238 25 2624456 2625673 24 2535 6465 25 58 75 14 +241 154 2554929 2616444 154 2626 7131 148 34 57 17 +248 276 15529 30987 274 1040 1222 136 37 79 27 +254 3018 33966 6635 2837 1057 1622 539 24 60 33 +255 20 1581774 1811334 20 6068 6301 18 33 57 10 +256 5 5145 6841 5 367 376 5 58 58 1 +270 2 2195579 2262119 2 7102 7123 2 33 34 2 +281 32 2379460 616215 32 6042 6086 23 53 64 12 +282 7 1292651 24244 7 1607 2455 6 46 55 5 +286 123 1521935 5269 123 3793 3940 81 40 66 22 +291 21 2419080 3567 21 297 4731 21 54 55 2 diff --git a/tests/queries/0_stateless/02995_index_3.sh b/tests/queries/0_stateless/02995_index_3.sh new file mode 100755 index 00000000000..506429e2696 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_3.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {201..300} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_4.reference b/tests/queries/0_stateless/02995_index_4.reference new file mode 100644 index 00000000000..deff7afaed3 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_4.reference @@ -0,0 +1,16 @@ +316 4 5221 5616 4 505 558 4 32 35 3 +319 232 56480 63033 230 1599 313 50 33 64 26 +327 15 51647 51894 14 1292 1585 14 47 57 7 +332 24 23484 54948 24 1609 1726 16 32 49 11 +333 1 14189 14189 1 1550 1550 1 63 63 1 +342 49 2579220 2622432 49 4626 6933 48 34 54 14 +344 1 6486 6486 1 509 509 1 24 24 1 +346 1987 53016 6735 1823 1334 174 294 26 62 32 +358 45 59058 60844 44 6746 722 40 57 84 15 +363 1198 1260033 2568811 1196 5710 5790 82 55 80 26 +384 150 2361175 476024 150 7008 7123 81 38 64 22 +387 277 5200 6553 252 243 521 130 65 65 1 +392 1877 1607428 2030850 1875 1416 7131 1379 54 66 13 +396 8181 1380803 6186 7920 545 798 1743 24 67 39 +398 3 5183 5213 2 291 352 3 53 59 3 +399 62 51494 59203 61 7073 754 42 55 78 18 diff --git a/tests/queries/0_stateless/02995_index_4.sh b/tests/queries/0_stateless/02995_index_4.sh new file mode 100755 index 00000000000..1a0458728f9 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_4.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {301..400} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_5.reference b/tests/queries/0_stateless/02995_index_5.reference new file mode 100644 index 00000000000..c5ab4d2417c --- /dev/null +++ b/tests/queries/0_stateless/02995_index_5.reference @@ -0,0 +1,9 @@ +412 2141 1360120 2189792 2136 2491 5658 1371 71 75 5 +413 2 2036037 2064917 2 3963 4666 2 43 45 2 +431 33 2302331 2348449 33 4425 6516 32 69 69 1 +447 59 25125 33094 59 1176 1817 56 53 58 6 +456 1 53157 53157 1 1556 1556 1 26 26 1 +462 5 5456 6280 5 348 4337 5 28 40 5 +472 1 1443716 1443716 1 6122 6122 1 42 42 1 +491 34 1066102 1183673 34 6606 6822 32 46 67 15 +498 896 2230163 3054 895 537 7131 714 24 59 28 diff --git a/tests/queries/0_stateless/02995_index_5.sh b/tests/queries/0_stateless/02995_index_5.sh new file mode 100755 index 00000000000..60c12a8146d --- /dev/null +++ b/tests/queries/0_stateless/02995_index_5.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {401..500} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_6.reference b/tests/queries/0_stateless/02995_index_6.reference new file mode 100644 index 00000000000..bac19179bb6 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_6.reference @@ -0,0 +1,14 @@ +504 108 12281 25180 108 1318 1784 94 55 66 12 +515 22 1588883 2640809 22 6554 6571 15 46 59 12 +518 1 37743 37743 1 1558 1558 1 72 72 1 +530 1 3033 3033 1 561 561 1 59 59 1 +532 26 5721 6355 25 549 665 14 44 50 7 +546 156 2577874 48517 156 1105 324 133 44 51 8 +554 12 1665194 2640066 12 1817 2951 12 57 57 1 +564 3865 2028049 2083433 3722 1115 985 2203 44 84 41 +566 4432 50605 57509 3217 1191 267 459 26 72 39 +567 8 5221 5893 7 333 558 8 27 35 4 +582 1172 1320619 2019743 1172 5819 7131 757 26 63 30 +584 43100 2500 5594 22561 134 4573 1660 48 84 37 +589 28 6046 6068 19 345 564 27 55 62 8 +595 139 1585165 1683606 138 2231 3598 132 54 84 28 diff --git a/tests/queries/0_stateless/02995_index_6.sh b/tests/queries/0_stateless/02995_index_6.sh new file mode 100755 index 00000000000..4936f73f36b --- /dev/null +++ b/tests/queries/0_stateless/02995_index_6.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {501..600} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_7.reference b/tests/queries/0_stateless/02995_index_7.reference new file mode 100644 index 00000000000..4f1d28ab37e --- /dev/null +++ b/tests/queries/0_stateless/02995_index_7.reference @@ -0,0 +1,9 @@ +615 3 1056081 1116230 3 5794 5796 2 59 62 3 +619 7 1543114 5241 7 2442 3105 7 41 45 3 +634 2722 1221058 4999 2686 2426 7131 1735 54 60 7 +635 237 2119333 4667 237 561 5999 176 49 60 12 +644 5 1774169 2056171 5 5591 6091 4 33 39 3 +647 8 51632 64403 8 1457 1624 8 26 34 5 +651 1325 1620565 6281 1301 528 792 815 62 63 2 +665 13 4598 4789 13 511 558 11 39 46 7 +679 1560 1613200 25940 1552 1569 3118 781 49 84 35 diff --git a/tests/queries/0_stateless/02995_index_7.sh b/tests/queries/0_stateless/02995_index_7.sh new file mode 100755 index 00000000000..26be310abce --- /dev/null +++ b/tests/queries/0_stateless/02995_index_7.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {601..700} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_8.reference b/tests/queries/0_stateless/02995_index_8.reference new file mode 100644 index 00000000000..62fbfb2af9a --- /dev/null +++ b/tests/queries/0_stateless/02995_index_8.reference @@ -0,0 +1,12 @@ +704 2 14226 15594 2 1086 1116 2 65 71 2 +715 25 1199352 3490 25 5036 5112 23 34 55 13 +716 1253 61989 6735 1050 1203 1625 397 52 65 14 +730 2584 5560 6170 634 2421 627 293 56 69 14 +736 8 1433153 4941 8 339 4594 8 28 36 5 +749 2 1326176 1339862 2 4339 6213 2 49 50 2 +753 1 53157 53157 1 1556 1556 1 26 26 1 +761 63 1443230 6881 63 3154 3204 26 56 73 14 +762 49 1449596 1968154 49 2437 3753 48 54 62 9 +775 35107 5330 769436 2471 447 6607 656 70 81 12 +789 1 1552458 1552458 1 2441 2441 1 62 62 1 +794 158 5585 6585 155 495 929 67 24 50 20 diff --git a/tests/queries/0_stateless/02995_index_8.sh b/tests/queries/0_stateless/02995_index_8.sh new file mode 100755 index 00000000000..8c2620b59fd --- /dev/null +++ b/tests/queries/0_stateless/02995_index_8.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {701..800} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_9.reference b/tests/queries/0_stateless/02995_index_9.reference new file mode 100644 index 00000000000..86c7be173bd --- /dev/null +++ b/tests/queries/0_stateless/02995_index_9.reference @@ -0,0 +1,9 @@ +839 9 29223 46530 9 1336 1465 9 52 52 1 +844 5 2377545 2377635 5 5129 6321 5 53 69 5 +846 50 2172273 2589295 50 1582 3053 48 64 68 5 +847 2577 56656 63658 1582 1444 838 474 26 63 33 +861 1333 5570 6909 839 457 489 37 33 70 34 +873 2360 1519811 50487 2248 1310 1784 316 60 68 9 +879 228 6704 6785 79 279 507 121 35 66 24 +889 5130 2070007 39692 5040 1151 6791 2606 44 66 23 +896 4 511246 859452 4 6554 6561 4 67 71 4 diff --git a/tests/queries/0_stateless/02995_index_9.sh b/tests/queries/0_stateless/02995_index_9.sh new file mode 100755 index 00000000000..76160c62aaa --- /dev/null +++ b/tests/queries/0_stateless/02995_index_9.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {801..900} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference deleted file mode 100644 index b19d389d8d0..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference +++ /dev/null @@ -1,10 +0,0 @@ -before -rmt_master NewPart 0 1 -rmt_master MergeParts 0 1 -rmt_slave MergeParts 1 0 -rmt_slave DownloadPart 0 1 -after -rmt_master NewPart 0 1 -rmt_master MergeParts 0 1 -rmt_slave MergeParts 1 0 -rmt_slave DownloadPart 0 2 diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql deleted file mode 100644 index 548a8e5570a..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql +++ /dev/null @@ -1,35 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-shared-merge-tree --- SMT: The merge process is completely different from RMT - -drop table if exists rmt_master; -drop table if exists rmt_slave; - -create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by key settings always_fetch_merged_part=0; --- always_fetch_merged_part=1, consider this table as a "slave" -create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by key settings always_fetch_merged_part=1; - -insert into rmt_master values (1); - -system sync replica rmt_master; -system sync replica rmt_slave; -system stop replicated sends rmt_master; -optimize table rmt_master final settings alter_sync=1, optimize_throw_if_noop=1; - -select sleep(3) format Null; - -system flush logs; -select 'before'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -system start replicated sends rmt_master; --- sleep few seconds to try rmt_slave to fetch the part and reflect this error --- in system.part_log -select sleep(3) format Null; -system sync replica rmt_slave; - -system flush logs; -select 'after'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -drop table rmt_master; -drop table rmt_slave; diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference deleted file mode 100644 index aac9e7527d1..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference +++ /dev/null @@ -1,10 +0,0 @@ -before -rmt_master NewPart 0 1 -rmt_master MutatePart 0 1 -rmt_slave DownloadPart 0 1 -rmt_slave MutatePart 1 0 -after -rmt_master NewPart 0 1 -rmt_master MutatePart 0 1 -rmt_slave DownloadPart 0 2 -rmt_slave MutatePart 1 0 diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql deleted file mode 100644 index d8b5ebb3148..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql +++ /dev/null @@ -1,41 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-shared-merge-tree --- SMT: The merge process is completely different from RMT - -drop table if exists rmt_master; -drop table if exists rmt_slave; - -create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by tuple() settings always_fetch_merged_part=0, old_parts_lifetime=600; --- prefer_fetch_merged_part_*_threshold=0, consider this table as a "slave" -create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by tuple() settings prefer_fetch_merged_part_time_threshold=0, prefer_fetch_merged_part_size_threshold=0, old_parts_lifetime=600; - -insert into rmt_master values (1); - -system sync replica rmt_master; -system sync replica rmt_slave; -system stop replicated sends rmt_master; -system stop pulling replication log rmt_slave; -alter table rmt_master update key=key+100 where 1 settings alter_sync=1; - --- first we need to make the rmt_master execute mutation so that it will have --- the part, and rmt_slave will consider it instead of performing mutation on --- it's own, otherwise prefer_fetch_merged_part_*_threshold will be simply ignored -select sleep(3) format Null; -system start pulling replication log rmt_slave; --- and sleep few more seconds to try rmt_slave to fetch the part and reflect --- this error in system.part_log -select sleep(3) format Null; - -system flush logs; -select 'before'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -system start replicated sends rmt_master; -select sleep(3) format Null; -system sync replica rmt_slave; - -system flush logs; -select 'after'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -drop table rmt_master; -drop table rmt_slave; diff --git a/tests/queries/0_stateless/03005_input_function_in_join.sql b/tests/queries/0_stateless/03005_input_function_in_join.sql index 8a6b4a48a8d..a6fc27cd8d0 100644 --- a/tests/queries/0_stateless/03005_input_function_in_join.sql +++ b/tests/queries/0_stateless/03005_input_function_in_join.sql @@ -8,7 +8,7 @@ SELECT x.number FROM ( ) AS x INNER JOIN input('a UInt64') AS y ON x.number = y.a Format CSV 2 -; + select * from test; drop table test; diff --git a/tests/queries/0_stateless/03013_json_key_ignore_case.sh b/tests/queries/0_stateless/03013_json_key_ignore_case.sh index 807e743b22a..7882a29ae4f 100755 --- a/tests/queries/0_stateless/03013_json_key_ignore_case.sh +++ b/tests/queries/0_stateless/03013_json_key_ignore_case.sh @@ -13,6 +13,6 @@ cp "$CURDIR"/data_json/key_ignore_case.json $USER_FILES_PATH/ $CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" $CLICKHOUSE_CLIENT -q "create table test_tbl (id UInt16, reqid UInt32, name String) engine=MergeTree order by id" -$CLICKHOUSE_CLIENT -q "INSERT INTO test_tbl SELECT * FROM file('key_ignore_case.json', 'JSONEachRow') SETTINGS input_format_json_ignore_key_case=true" +$CLICKHOUSE_CLIENT -q "INSERT INTO test_tbl SELECT * FROM file('key_ignore_case.json', 'JSONEachRow') SETTINGS input_format_json_case_insensitive_column_matching=true" $CLICKHOUSE_CLIENT -q "select * from test_tbl" -$CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file +$CLICKHOUSE_CLIENT -q "drop table test_tbl" diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference b/tests/queries/0_stateless/03023_invalid_format_detection.reference similarity index 100% rename from tests/queries/0_stateless/02982_aggregation_states_destruction.reference rename to tests/queries/0_stateless/03023_invalid_format_detection.reference diff --git a/tests/queries/0_stateless/03023_invalid_format_detection.sh b/tests/queries/0_stateless/03023_invalid_format_detection.sh new file mode 100755 index 00000000000..984a42aa218 --- /dev/null +++ b/tests/queries/0_stateless/03023_invalid_format_detection.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +touch $CLICKHOUSE_TEST_UNIQUE_NAME.xml +$CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.*')" 2>&1 | grep -c "CANNOT_DETECT_FORMAT" +rm $CLICKHOUSE_TEST_UNIQUE_NAME.xml + diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference deleted file mode 100644 index 202e4557a33..00000000000 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference +++ /dev/null @@ -1,2 +0,0 @@ -query -String diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql deleted file mode 100644 index b26096f7f0e..00000000000 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql +++ /dev/null @@ -1,18 +0,0 @@ - -SELECT * FROM fuzzQuery('SELECT 1', 500, 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; - -SELECT * FROM fuzzQuery('SELECT * -FROM ( - SELECT - ([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id, - count() - FROM numbers(3) - GROUP BY item_id WITH TOTALS -) AS l FULL JOIN ( - SELECT - ([toString((number % 2) * 2)] :: Array(String)) AS item_id - FROM numbers(3) -) AS r -ON l.item_id = r.item_id -ORDER BY 1,2,3; -', 500, 8956) LIMIT 10 FORMAT NULL; diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.lib similarity index 79% rename from tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh rename to tests/queries/0_stateless/03036_dynamic_read_subcolumns.lib index 65517061b99..4914051db82 100755 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.lib @@ -1,14 +1,4 @@ #!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" - function test() { @@ -43,20 +33,3 @@ function test() $CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null" $CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null" } - -$CH_CLIENT -q "drop table if exists test;" - -echo "Memory" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" -test -$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference deleted file mode 100644 index 36984bc8b9b..00000000000 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference +++ /dev/null @@ -1,57 +0,0 @@ -Memory -test -Array(Array(Dynamic)) -Array(Variant(String, UInt64)) -None -String -UInt64 -200000 -200000 -200000 -200000 -0 -0 -200000 -200000 -100000 -100000 -200000 -0 -MergeTree compact -test -Array(Array(Dynamic)) -Array(Variant(String, UInt64)) -None -String -UInt64 -200000 -200000 -200000 -200000 -0 -0 -200000 -200000 -100000 -100000 -200000 -0 -MergeTree wide -test -Array(Array(Dynamic)) -Array(Variant(String, UInt64)) -None -String -UInt64 -200000 -200000 -200000 -200000 -0 -0 -200000 -200000 -100000 -100000 -200000 -0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.reference new file mode 100644 index 00000000000..0d51ecfac3b --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.reference @@ -0,0 +1,19 @@ +Memory +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.sh new file mode 100755 index 00000000000..ed548ae74e9 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +# shellcheck source=./03036_dynamic_read_subcolumns.lib +. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.reference new file mode 100644 index 00000000000..099b7574566 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.reference @@ -0,0 +1,19 @@ +MergeTree compact +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.sh new file mode 100755 index 00000000000..95dafcf5832 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +# shellcheck source=./03036_dynamic_read_subcolumns.lib +. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.reference new file mode 100644 index 00000000000..35db4a22b4c --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.reference @@ -0,0 +1,19 @@ +MergeTree wide +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.sh new file mode 100755 index 00000000000..a3c2d93e568 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +# shellcheck source=./03036_dynamic_read_subcolumns.lib +. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.reference new file mode 100644 index 00000000000..d75d75896f7 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.reference @@ -0,0 +1,17 @@ +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql new file mode 100644 index 00000000000..66fbf006a8c --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql @@ -0,0 +1,40 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000; + +insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000; +insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000; +insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; + +select distinct dynamicType(d) as type from test order by type; +select count() from test where dynamicType(d) == 'UInt64'; +select count() from test where d.UInt64 is not NULL; +select count() from test where dynamicType(d) == 'String'; +select count() from test where d.String is not NULL; +select count() from test where dynamicType(d) == 'Date'; +select count() from test where d.Date is not NULL; +select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'; +select count() from test where not empty(d.`Array(Variant(String, UInt64))`); +select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'; +select count() from test where not empty(d.`Array(Array(Dynamic))`); +select count() from test where d is NULL; +select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String); + +select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.Int8, d.Date, d.`Array(String)` from test format Null; +select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null; +select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null; + +drop table test; diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.reference new file mode 100644 index 00000000000..d75d75896f7 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.reference @@ -0,0 +1,17 @@ +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.sql new file mode 100644 index 00000000000..bb03bdef704 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.sql @@ -0,0 +1,40 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=Memory; + +insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000; +insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000; +insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; + +select distinct dynamicType(d) as type from test order by type; +select count() from test where dynamicType(d) == 'UInt64'; +select count() from test where d.UInt64 is not NULL; +select count() from test where dynamicType(d) == 'String'; +select count() from test where d.String is not NULL; +select count() from test where dynamicType(d) == 'Date'; +select count() from test where d.Date is not NULL; +select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'; +select count() from test where not empty(d.`Array(Variant(String, UInt64))`); +select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'; +select count() from test where not empty(d.`Array(Array(Dynamic))`); +select count() from test where d is NULL; +select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String); + +select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.Int8, d.Date, d.`Array(String)` from test format Null; +select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null; +select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null; + +drop table test; diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.reference new file mode 100644 index 00000000000..d75d75896f7 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.reference @@ -0,0 +1,17 @@ +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql new file mode 100644 index 00000000000..00aba3a57b6 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql @@ -0,0 +1,40 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; + +insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000; +insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000; +insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; + +select distinct dynamicType(d) as type from test order by type; +select count() from test where dynamicType(d) == 'UInt64'; +select count() from test where d.UInt64 is not NULL; +select count() from test where dynamicType(d) == 'String'; +select count() from test where d.String is not NULL; +select count() from test where dynamicType(d) == 'Date'; +select count() from test where d.Date is not NULL; +select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'; +select count() from test where not empty(d.`Array(Variant(String, UInt64))`); +select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'; +select count() from test where not empty(d.`Array(Array(Dynamic))`); +select count() from test where d is NULL; +select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String); + +select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.Int8, d.Date, d.`Array(String)` from test format Null; +select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null; +select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null; + +drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference deleted file mode 100644 index 59297e46330..00000000000 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference +++ /dev/null @@ -1,60 +0,0 @@ -MergeTree compact -test -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String -MergeTree wide -test -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh deleted file mode 100755 index 887b2ed94d7..00000000000 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" - -function test() -{ - echo "test" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, number from numbers(100000)" - $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)" - $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)" - $CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)" - $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)" - $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)" - - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;" -test -$CH_CLIENT -q "drop table test;" - diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference new file mode 100644 index 00000000000..d0d777a5a38 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference @@ -0,0 +1,28 @@ +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql new file mode 100644 index 00000000000..07371ee099b --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql @@ -0,0 +1,33 @@ +-- Tags: long +set allow_experimental_dynamic_type=1; + +drop table if exists test; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, 'str_' || toString(number) from numbers(80000); +insert into test select number, range(number % 10 + 1) from numbers(70000); +insert into test select number, toDate(number) from numbers(60000); +insert into test select number, toDateTime(number) from numbers(50000); +insert into test select number, NULL from numbers(100000); + +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; optimize table test final;; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, map(number, number) from numbers(200000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, tuple(number, number) from numbers(10000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference new file mode 100644 index 00000000000..d0d777a5a38 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference @@ -0,0 +1,28 @@ +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql new file mode 100644 index 00000000000..2b55a31e937 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql @@ -0,0 +1,33 @@ +-- Tags: long +set allow_experimental_dynamic_type=1; + +drop table if exists test; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, 'str_' || toString(number) from numbers(80000); +insert into test select number, range(number % 10 + 1) from numbers(70000); +insert into test select number, toDate(number) from numbers(60000); +insert into test select number, toDateTime(number) from numbers(50000); +insert into test select number, NULL from numbers(100000); + +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; optimize table test final;; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, map(number, number) from numbers(200000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, tuple(number, number) from numbers(10000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference deleted file mode 100644 index 59297e46330..00000000000 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference +++ /dev/null @@ -1,60 +0,0 @@ -MergeTree compact -test -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String -MergeTree wide -test -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh deleted file mode 100755 index 371ae87c2ef..00000000000 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - - - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" -function test() -{ - echo "test" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, number from numbers(100000)" - $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)" - $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)" - $CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)" - $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)" - $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)" - - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;" -test -$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference new file mode 100644 index 00000000000..d0d777a5a38 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference @@ -0,0 +1,28 @@ +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql new file mode 100644 index 00000000000..ea7295a9eab --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql @@ -0,0 +1,34 @@ +-- Tags: long +set allow_experimental_dynamic_type=1; + +drop table if exists test; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, 'str_' || toString(number) from numbers(80000); +insert into test select number, range(number % 10 + 1) from numbers(70000); +insert into test select number, toDate(number) from numbers(60000); +insert into test select number, toDateTime(number) from numbers(50000); +insert into test select number, NULL from numbers(100000); + +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, map(number, number) from numbers(200000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, tuple(number, number) from numbers(10000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference new file mode 100644 index 00000000000..d0d777a5a38 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference @@ -0,0 +1,28 @@ +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql new file mode 100644 index 00000000000..e888a14b323 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql @@ -0,0 +1,33 @@ +-- Tags: long +set allow_experimental_dynamic_type=1; + +drop table if exists test; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, 'str_' || toString(number) from numbers(80000); +insert into test select number, range(number % 10 + 1) from numbers(70000); +insert into test select number, toDate(number) from numbers(60000); +insert into test select number, toDateTime(number) from numbers(50000); +insert into test select number, NULL from numbers(100000); + +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; optimize table test final;; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, map(number, number) from numbers(200000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, tuple(number, number) from numbers(10000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.reference b/tests/queries/0_stateless/03037_dynamic_merges_2.reference deleted file mode 100644 index 420b8185b16..00000000000 --- a/tests/queries/0_stateless/03037_dynamic_merges_2.reference +++ /dev/null @@ -1,20 +0,0 @@ -MergeTree compact + horizontal merge -test -1000000 Array(UInt16) -1000000 String -1000000 UInt64 -MergeTree wide + horizontal merge -test -1000000 Array(UInt16) -1000000 String -1000000 UInt64 -MergeTree compact + vertical merge -test -1000000 Array(UInt16) -1000000 String -1000000 UInt64 -MergeTree wide + vertical merge -test -1000000 Array(UInt16) -1000000 String -1000000 UInt64 diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.sh b/tests/queries/0_stateless/03037_dynamic_merges_2.sh deleted file mode 100755 index 40adbdd4262..00000000000 --- a/tests/queries/0_stateless/03037_dynamic_merges_2.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" - - -function test() -{ - echo "test" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, number from numbers(1000000)" - $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000)" - $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000)" - - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree compact + vertical merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide + vertical merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" -test -$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.reference new file mode 100644 index 00000000000..afd392002e5 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.reference @@ -0,0 +1,3 @@ +1000000 Array(UInt16) +1000000 String +1000000 UInt64 diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql new file mode 100644 index 00000000000..e633b277ebd --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql @@ -0,0 +1,14 @@ +-- Tags: long + +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, lock_acquire_timeout_for_background_operations=600; +system stop merges test; +insert into test select number, number from numbers(1000000); +insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000); +insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.reference new file mode 100644 index 00000000000..afd392002e5 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.reference @@ -0,0 +1,3 @@ +1000000 Array(UInt16) +1000000 String +1000000 UInt64 diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql new file mode 100644 index 00000000000..90dbc2d84f5 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql @@ -0,0 +1,14 @@ +-- Tags: long + +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, lock_acquire_timeout_for_background_operations=600; +system stop merges test; +insert into test select number, number from numbers(1000000); +insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000); +insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.reference new file mode 100644 index 00000000000..afd392002e5 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.reference @@ -0,0 +1,3 @@ +1000000 Array(UInt16) +1000000 String +1000000 UInt64 diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql new file mode 100644 index 00000000000..ffd2618ee51 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql @@ -0,0 +1,14 @@ +-- Tags: long + +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600; +system stop merges test; +insert into test select number, number from numbers(1000000); +insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000); +insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.reference new file mode 100644 index 00000000000..afd392002e5 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.reference @@ -0,0 +1,3 @@ +1000000 Array(UInt16) +1000000 String +1000000 UInt64 diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql new file mode 100644 index 00000000000..36dff88751b --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql @@ -0,0 +1,14 @@ +-- Tags: long + +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600; +system stop merges test; +insert into test select number, number from numbers(1000000); +insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000); +insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +drop table test; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference deleted file mode 100644 index 65034647775..00000000000 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference +++ /dev/null @@ -1,92 +0,0 @@ -MergeTree compact + horizontal merge -test -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None -MergeTree wide + horizontal merge -test -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None -MergeTree compact + vertical merge -test -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None -MergeTree wide + vertical merge -test -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh deleted file mode 100755 index 5d8eac082cf..00000000000 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" - - -function test() -{ - echo "test" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, number from numbers(100000)" - $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" - $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" - - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - - $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" - $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" - - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree compact + vertical merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide + vertical merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" -test -$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference new file mode 100644 index 00000000000..4be740f6050 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference @@ -0,0 +1,21 @@ +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql new file mode 100644 index 00000000000..1d5c63dcdf1 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql @@ -0,0 +1,29 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; +set enable_named_columns_in_function_tuple = 0; + +drop table if exists test;; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, lock_acquire_timeout_for_background_operations=600; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000); +insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000); +insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +drop table test; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference new file mode 100644 index 00000000000..4be740f6050 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference @@ -0,0 +1,21 @@ +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql new file mode 100644 index 00000000000..2bffe35c577 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql @@ -0,0 +1,29 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; +set enable_named_columns_in_function_tuple = 0; + +drop table if exists test;; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000); +insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000); +insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +drop table test; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference new file mode 100644 index 00000000000..4be740f6050 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference @@ -0,0 +1,21 @@ +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql new file mode 100644 index 00000000000..fb686091ebb --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql @@ -0,0 +1,29 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; +set enable_named_columns_in_function_tuple = 0; + +drop table if exists test;; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, lock_acquire_timeout_for_background_operations=600; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000); +insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000); +insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +drop table test; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference new file mode 100644 index 00000000000..4be740f6050 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference @@ -0,0 +1,21 @@ +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql new file mode 100644 index 00000000000..ed195452d56 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql @@ -0,0 +1,29 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; +set enable_named_columns_in_function_tuple = 0; + +drop table if exists test;; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000); +insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000); +insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +drop table test; diff --git a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh index b8760ec0e1d..9ea86105a3a 100755 --- a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh +++ b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh index 881c9ec64cc..9a2a6dd957c 100755 --- a/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh +++ b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh index fc9039ac98c..0199035a3df 100755 --- a/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh +++ b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh index f9da70e95ca..e2ea5bc3466 100755 --- a/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh +++ b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh index 60248f4453a..43607cf95a8 100755 --- a/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh +++ b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference deleted file mode 100644 index a9c785d1e48..00000000000 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference +++ /dev/null @@ -1,526 +0,0 @@ -Memory -initial insert -alter add column 1 -3 None -0 0 \N \N \N 0 -1 1 \N \N \N 0 -2 2 \N \N \N 0 -insert after alter add column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -alter modify column 1 -7 None -8 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -insert after alter modify column 1 -8 None -11 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -alter modify column 2 -4 UInt64 -7 String -8 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -insert after alter modify column 2 -1 Date -5 UInt64 -8 String -9 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -19 19 \N \N \N \N 0 -20 20 20 \N 20 \N 0 -21 21 str_21 str_21 \N \N 0 -22 22 1970-01-23 \N \N 1970-01-23 0 -alter modify column 3 -1 Date -5 UInt64 -8 String -9 None -0 0 0 \N 0 \N \N \N 0 -1 1 1 \N 0 \N \N \N 0 -2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 \N 3 \N 0 -4 4 4 \N 0 \N 4 \N 0 -5 5 5 \N 0 \N 5 \N 0 -6 6 6 \N 0 str_6 \N \N 0 -7 7 7 \N 0 str_7 \N \N 0 -8 8 8 \N 0 str_8 \N \N 0 -9 9 9 \N 0 \N \N \N 0 -10 10 10 \N 0 \N \N \N 0 -11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 \N 12 \N 0 -13 13 13 \N 0 str_13 \N \N 0 -14 14 14 \N 0 \N \N \N 0 -15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 -17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 -19 19 19 \N 0 \N \N \N 0 -20 20 20 \N 0 \N 20 \N 0 -21 21 21 \N 0 str_21 \N \N 0 -22 22 22 \N 0 \N \N 1970-01-23 0 -insert after alter modify column 3 -1 Date -5 UInt64 -8 String -12 None -0 0 0 \N 0 \N \N \N 0 -1 1 1 \N 0 \N \N \N 0 -2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 \N 3 \N 0 -4 4 4 \N 0 \N 4 \N 0 -5 5 5 \N 0 \N 5 \N 0 -6 6 6 \N 0 str_6 \N \N 0 -7 7 7 \N 0 str_7 \N \N 0 -8 8 8 \N 0 str_8 \N \N 0 -9 9 9 \N 0 \N \N \N 0 -10 10 10 \N 0 \N \N \N 0 -11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 \N 12 \N 0 -13 13 13 \N 0 str_13 \N \N 0 -14 14 14 \N 0 \N \N \N 0 -15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 -17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 -19 19 19 \N 0 \N \N \N 0 -20 20 20 \N 0 \N 20 \N 0 -21 21 21 \N 0 str_21 \N \N 0 -22 22 22 \N 0 \N \N 1970-01-23 0 -23 \N \N \N 0 \N \N \N 0 -24 24 24 \N 0 \N \N \N 0 -25 str_25 \N str_25 0 \N \N \N 0 -MergeTree compact -initial insert -alter add column 1 -3 None -0 0 \N \N \N 0 -1 1 \N \N \N 0 -2 2 \N \N \N 0 -insert after alter add column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -alter modify column 1 -7 None -8 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -insert after alter modify column 1 -8 None -11 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -alter modify column 2 -8 None -11 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -insert after alter modify column 2 -1 Date -1 UInt64 -9 None -12 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -19 19 \N \N \N \N 0 -20 20 20 \N 20 \N 0 -21 21 str_21 str_21 \N \N 0 -22 22 1970-01-23 \N \N 1970-01-23 0 -alter modify column 3 -1 Date -1 UInt64 -9 None -12 String -0 0 0 \N 0 \N \N \N 0 -1 1 1 \N 0 \N \N \N 0 -2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 3 \N \N 0 -4 4 4 \N 0 4 \N \N 0 -5 5 5 \N 0 5 \N \N 0 -6 6 6 \N 0 str_6 \N \N 0 -7 7 7 \N 0 str_7 \N \N 0 -8 8 8 \N 0 str_8 \N \N 0 -9 9 9 \N 0 \N \N \N 0 -10 10 10 \N 0 \N \N \N 0 -11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 12 \N \N 0 -13 13 13 \N 0 str_13 \N \N 0 -14 14 14 \N 0 \N \N \N 0 -15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 -17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 -19 19 19 \N 0 \N \N \N 0 -20 20 20 \N 0 \N 20 \N 0 -21 21 21 \N 0 str_21 \N \N 0 -22 22 22 \N 0 \N \N 1970-01-23 0 -insert after alter modify column 3 -1 Date -1 UInt64 -12 None -12 String -0 0 0 \N 0 \N \N \N 0 -1 1 1 \N 0 \N \N \N 0 -2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 3 \N \N 0 -4 4 4 \N 0 4 \N \N 0 -5 5 5 \N 0 5 \N \N 0 -6 6 6 \N 0 str_6 \N \N 0 -7 7 7 \N 0 str_7 \N \N 0 -8 8 8 \N 0 str_8 \N \N 0 -9 9 9 \N 0 \N \N \N 0 -10 10 10 \N 0 \N \N \N 0 -11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 12 \N \N 0 -13 13 13 \N 0 str_13 \N \N 0 -14 14 14 \N 0 \N \N \N 0 -15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 -17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 -19 19 19 \N 0 \N \N \N 0 -20 20 20 \N 0 \N 20 \N 0 -21 21 21 \N 0 str_21 \N \N 0 -22 22 22 \N 0 \N \N 1970-01-23 0 -23 \N \N \N 0 \N \N \N 0 -24 24 24 \N 0 \N \N \N 0 -25 str_25 \N str_25 0 \N \N \N 0 -MergeTree wide -initial insert -alter add column 1 -3 None -0 0 \N \N \N 0 -1 1 \N \N \N 0 -2 2 \N \N \N 0 -insert after alter add column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -alter modify column 1 -7 None -8 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -insert after alter modify column 1 -8 None -11 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -alter modify column 2 -8 None -11 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -insert after alter modify column 2 -1 Date -1 UInt64 -9 None -12 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -19 19 \N \N \N \N 0 -20 20 20 \N 20 \N 0 -21 21 str_21 str_21 \N \N 0 -22 22 1970-01-23 \N \N 1970-01-23 0 -alter modify column 3 -1 Date -1 UInt64 -9 None -12 String -0 0 0 \N 0 \N \N \N 0 -1 1 1 \N 0 \N \N \N 0 -2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 3 \N \N 0 -4 4 4 \N 0 4 \N \N 0 -5 5 5 \N 0 5 \N \N 0 -6 6 6 \N 0 str_6 \N \N 0 -7 7 7 \N 0 str_7 \N \N 0 -8 8 8 \N 0 str_8 \N \N 0 -9 9 9 \N 0 \N \N \N 0 -10 10 10 \N 0 \N \N \N 0 -11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 12 \N \N 0 -13 13 13 \N 0 str_13 \N \N 0 -14 14 14 \N 0 \N \N \N 0 -15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 -17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 -19 19 19 \N 0 \N \N \N 0 -20 20 20 \N 0 \N 20 \N 0 -21 21 21 \N 0 str_21 \N \N 0 -22 22 22 \N 0 \N \N 1970-01-23 0 -insert after alter modify column 3 -1 Date -1 UInt64 -12 None -12 String -0 0 0 \N 0 \N \N \N 0 -1 1 1 \N 0 \N \N \N 0 -2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 3 \N \N 0 -4 4 4 \N 0 4 \N \N 0 -5 5 5 \N 0 5 \N \N 0 -6 6 6 \N 0 str_6 \N \N 0 -7 7 7 \N 0 str_7 \N \N 0 -8 8 8 \N 0 str_8 \N \N 0 -9 9 9 \N 0 \N \N \N 0 -10 10 10 \N 0 \N \N \N 0 -11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 12 \N \N 0 -13 13 13 \N 0 str_13 \N \N 0 -14 14 14 \N 0 \N \N \N 0 -15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 -17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 -19 19 19 \N 0 \N \N \N 0 -20 20 20 \N 0 \N 20 \N 0 -21 21 21 \N 0 str_21 \N \N 0 -22 22 22 \N 0 \N \N 1970-01-23 0 -23 \N \N \N 0 \N \N \N 0 -24 24 24 \N 0 \N \N \N 0 -25 str_25 \N str_25 0 \N \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh deleted file mode 100755 index 1f2a6a31ad7..00000000000 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" - -function run() -{ - echo "initial insert" - $CH_CLIENT -q "insert into test select number, number from numbers(3)" - - echo "alter add column 1" - $CH_CLIENT -q "alter table test add column d Dynamic(max_types=3) settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "insert after alter add column 1" - $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" - $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" - $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" - $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "alter modify column 1" - $CH_CLIENT -q "alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "insert after alter modify column 1" - $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "alter modify column 2" - $CH_CLIENT -q "alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "insert after alter modify column 2" - $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "alter modify column 3" - $CH_CLIENT -q "alter table test modify column y Dynamic settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, y.UInt64, y.String, y.\`Tuple(a UInt64)\`.a, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "insert after alter modify column 3" - $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, y.UInt64, y.String, y.\`Tuple(a UInt64)\`.a, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "Memory" -$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=Memory" -run -$CH_CLIENT -q "drop table test;" - -echo "MergeTree compact" -$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" -run -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide" -$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" -run -$CH_CLIENT -q "drop table test;" - diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference new file mode 100644 index 00000000000..2ec301b747b --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference @@ -0,0 +1,174 @@ +initial insert +alter add column 1 +3 None +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +alter modify column 2 +8 None +11 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +insert after alter modify column 2 +1 Date +1 UInt64 +9 None +12 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 +alter modify column 3 +1 Date +1 UInt64 +9 None +12 String +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +insert after alter modify column 3 +1 Date +1 UInt64 +12 None +12 String +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql new file mode 100644 index 00000000000..4ab700306d4 --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql @@ -0,0 +1,53 @@ +set allow_experimental_dynamic_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; +create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000; +select 'initial insert'; +insert into test select number, number from numbers(3); + +select 'alter add column 1'; +alter table test add column d Dynamic(max_types=3) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter add column 1'; +insert into test select number, number, number from numbers(3, 3); +insert into test select number, number, 'str_' || toString(number) from numbers(6, 3); +insert into test select number, number, NULL from numbers(9, 3); +insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 1'; +alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 1'; +insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 2'; +alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 2'; +insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 3'; +alter table test modify column y Dynamic settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 3'; +insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference new file mode 100644 index 00000000000..c592528c3cd --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference @@ -0,0 +1,175 @@ +initial insert +alter add column 1 +3 None +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +alter modify column 2 +4 UInt64 +7 String +8 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +insert after alter modify column 2 +1 Date +5 UInt64 +8 String +9 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 +alter modify column 3 +1 Date +5 UInt64 +8 String +9 None +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +insert after alter modify column 3 +1 Date +5 UInt64 +8 String +12 None +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.sql b/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.sql new file mode 100644 index 00000000000..e802fd034ce --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.sql @@ -0,0 +1,53 @@ +set allow_experimental_dynamic_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; +create table test (x UInt64, y UInt64) engine=Memory; +select 'initial insert'; +insert into test select number, number from numbers(3); + +select 'alter add column 1'; +alter table test add column d Dynamic(max_types=3) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter add column 1'; +insert into test select number, number, number from numbers(3, 3); +insert into test select number, number, 'str_' || toString(number) from numbers(6, 3); +insert into test select number, number, NULL from numbers(9, 3); +insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 1'; +alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 1'; +insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 2'; +alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 2'; +insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 3'; +alter table test modify column y Dynamic settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 3'; +insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference new file mode 100644 index 00000000000..2ec301b747b --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference @@ -0,0 +1,174 @@ +initial insert +alter add column 1 +3 None +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +alter modify column 2 +8 None +11 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +insert after alter modify column 2 +1 Date +1 UInt64 +9 None +12 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 +alter modify column 3 +1 Date +1 UInt64 +9 None +12 String +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +insert after alter modify column 3 +1 Date +1 UInt64 +12 None +12 String +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.sql b/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.sql new file mode 100644 index 00000000000..55c4f0b5f0c --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.sql @@ -0,0 +1,53 @@ +set allow_experimental_dynamic_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; +create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; +select 'initial insert'; +insert into test select number, number from numbers(3); + +select 'alter add column 1'; +alter table test add column d Dynamic(max_types=3) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter add column 1'; +insert into test select number, number, number from numbers(3, 3); +insert into test select number, number, 'str_' || toString(number) from numbers(6, 3); +insert into test select number, number, NULL from numbers(9, 3); +insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 1'; +alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 1'; +insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 2'; +alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 2'; +insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 3'; +alter table test modify column y Dynamic settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 3'; +insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference deleted file mode 100644 index f7c00bd8c44..00000000000 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference +++ /dev/null @@ -1,182 +0,0 @@ -MergeTree compact -initial insert -alter add column -3 None -0 0 \N \N \N 0 -1 1 \N \N \N 0 -2 2 \N \N \N 0 -insert after alter add column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -alter rename column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -insert nested dynamic -3 Array(Dynamic) -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 [] [] [] -1 1 \N \N \N \N 0 [] [] [] -2 2 \N \N \N \N 0 [] [] [] -3 3 3 \N 3 \N 0 [] [] [] -4 4 4 \N 4 \N 0 [] [] [] -5 5 5 \N 5 \N 0 [] [] [] -6 6 str_6 str_6 \N \N 0 [] [] [] -7 7 str_7 str_7 \N \N 0 [] [] [] -8 8 str_8 str_8 \N \N 0 [] [] [] -9 9 \N \N \N \N 0 [] [] [] -10 10 \N \N \N \N 0 [] [] [] -11 11 \N \N \N \N 0 [] [] [] -12 12 12 \N 12 \N 0 [] [] [] -13 13 str_13 str_13 \N \N 0 [] [] [] -14 14 \N \N \N \N 0 [] [] [] -15 15 [15] \N \N \N 0 [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N 0 [17] [NULL] [NULL] -alter rename column 2 -3 Array(Dynamic) -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 [] [] [] -1 1 \N \N \N \N 0 [] [] [] -2 2 \N \N \N \N 0 [] [] [] -3 3 3 \N 3 \N 0 [] [] [] -4 4 4 \N 4 \N 0 [] [] [] -5 5 5 \N 5 \N 0 [] [] [] -6 6 str_6 str_6 \N \N 0 [] [] [] -7 7 str_7 str_7 \N \N 0 [] [] [] -8 8 str_8 str_8 \N \N 0 [] [] [] -9 9 \N \N \N \N 0 [] [] [] -10 10 \N \N \N \N 0 [] [] [] -11 11 \N \N \N \N 0 [] [] [] -12 12 12 \N 12 \N 0 [] [] [] -13 13 str_13 str_13 \N \N 0 [] [] [] -14 14 \N \N \N \N 0 [] [] [] -15 15 [15] \N \N \N 0 [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N 0 [17] [NULL] [NULL] -MergeTree wide -initial insert -alter add column -3 None -0 0 \N \N \N 0 -1 1 \N \N \N 0 -2 2 \N \N \N 0 -insert after alter add column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -alter rename column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -insert nested dynamic -3 Array(Dynamic) -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 [] [] [] -1 1 \N \N \N \N 0 [] [] [] -2 2 \N \N \N \N 0 [] [] [] -3 3 3 \N 3 \N 0 [] [] [] -4 4 4 \N 4 \N 0 [] [] [] -5 5 5 \N 5 \N 0 [] [] [] -6 6 str_6 str_6 \N \N 0 [] [] [] -7 7 str_7 str_7 \N \N 0 [] [] [] -8 8 str_8 str_8 \N \N 0 [] [] [] -9 9 \N \N \N \N 0 [] [] [] -10 10 \N \N \N \N 0 [] [] [] -11 11 \N \N \N \N 0 [] [] [] -12 12 12 \N 12 \N 0 [] [] [] -13 13 str_13 str_13 \N \N 0 [] [] [] -14 14 \N \N \N \N 0 [] [] [] -15 15 [15] \N \N \N 0 [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N 0 [17] [NULL] [NULL] -alter rename column 2 -3 Array(Dynamic) -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 [] [] [] -1 1 \N \N \N \N 0 [] [] [] -2 2 \N \N \N \N 0 [] [] [] -3 3 3 \N 3 \N 0 [] [] [] -4 4 4 \N 4 \N 0 [] [] [] -5 5 5 \N 5 \N 0 [] [] [] -6 6 str_6 str_6 \N \N 0 [] [] [] -7 7 str_7 str_7 \N \N 0 [] [] [] -8 8 str_8 str_8 \N \N 0 [] [] [] -9 9 \N \N \N \N 0 [] [] [] -10 10 \N \N \N \N 0 [] [] [] -11 11 \N \N \N \N 0 [] [] [] -12 12 12 \N 12 \N 0 [] [] [] -13 13 str_13 str_13 \N \N 0 [] [] [] -14 14 \N \N \N \N 0 [] [] [] -15 15 [15] \N \N \N 0 [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N 0 [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh deleted file mode 100755 index 6491e64372f..00000000000 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" - -function run() -{ - echo "initial insert" - $CH_CLIENT -q "insert into test select number, number from numbers(3)" - - echo "alter add column" - $CH_CLIENT -q "alter table test add column d Dynamic settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "insert after alter add column 1" - $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" - $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" - $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" - $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "alter rename column 1" - $CH_CLIENT -q "alter table test rename column d to d1 settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1)" - $CH_CLIENT -q "select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.\`Tuple(a UInt64)\`.a from test order by x" - - echo "insert nested dynamic" - $CH_CLIENT -q "insert into test select number, number, [number % 2 ? number : 'str_' || toString(number)]::Array(Dynamic) from numbers(15, 3)" - $CH_CLIENT -q "select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1)" - $CH_CLIENT -q "select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.\`Tuple(a UInt64)\`.a, d1.\`Array(Dynamic)\`.UInt64, d1.\`Array(Dynamic)\`.String, d1.\`Array(Dynamic)\`.Date from test order by x" - - echo "alter rename column 2" - $CH_CLIENT -q "alter table test rename column d1 to d2 settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d2) from test group by dynamicType(d2) order by count(), dynamicType(d2)" - $CH_CLIENT -q "select x, y, d2, d2.String, d2.UInt64, d2.Date, d2.\`Tuple(a UInt64)\`.a, d2.\`Array(Dynamic)\`.UInt64, d2.\`Array(Dynamic)\`.String, d2.\`Array(Dynamic)\`.Date, from test order by x" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact" -$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" -run -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide" -$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" -run -$CH_CLIENT -q "drop table test;" - diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.reference new file mode 100644 index 00000000000..a2f2a19805d --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.reference @@ -0,0 +1,90 @@ +initial insert +alter add column +3 None +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +alter rename column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +insert nested dynamic +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] +alter rename column 2 +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.sql b/tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.sql new file mode 100644 index 00000000000..cead110dd7d --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.sql @@ -0,0 +1,39 @@ +set allow_experimental_dynamic_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; +create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000; + +select 'initial insert'; +insert into test select number, number from numbers(3); + +select 'alter add column'; +alter table test add column d Dynamic settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter add column 1'; +insert into test select number, number, number from numbers(3, 3); +insert into test select number, number, 'str_' || toString(number) from numbers(6, 3); +insert into test select number, number, NULL from numbers(9, 3); +insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter rename column 1'; +alter table test rename column d to d1 settings mutations_sync=1; +select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1); +select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.`Tuple(a UInt64)`.a from test order by x; + +select 'insert nested dynamic'; +insert into test select number, number, [number % 2 ? number : 'str_' || toString(number)]::Array(Dynamic) from numbers(15, 3); +select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1); +select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.`Tuple(a UInt64)`.a, d1.`Array(Dynamic)`.UInt64, d1.`Array(Dynamic)`.String, d1.`Array(Dynamic)`.Date from test order by x; + +select 'alter rename column 2'; +alter table test rename column d1 to d2 settings mutations_sync=1; +select count(), dynamicType(d2) from test group by dynamicType(d2) order by count(), dynamicType(d2); +select x, y, d2, d2.String, d2.UInt64, d2.Date, d2.`Tuple(a UInt64)`.a, d2.`Array(Dynamic)`.UInt64, d2.`Array(Dynamic)`.String, d2.`Array(Dynamic)`.Date, from test order by x; + +drop table test; diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.reference new file mode 100644 index 00000000000..a2f2a19805d --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.reference @@ -0,0 +1,90 @@ +initial insert +alter add column +3 None +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +alter rename column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +insert nested dynamic +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] +alter rename column 2 +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.sql b/tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.sql new file mode 100644 index 00000000000..f58599b1d61 --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.sql @@ -0,0 +1,39 @@ +set allow_experimental_dynamic_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; +create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; + +select 'initial insert'; +insert into test select number, number from numbers(3); + +select 'alter add column'; +alter table test add column d Dynamic settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter add column 1'; +insert into test select number, number, number from numbers(3, 3); +insert into test select number, number, 'str_' || toString(number) from numbers(6, 3); +insert into test select number, number, NULL from numbers(9, 3); +insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter rename column 1'; +alter table test rename column d to d1 settings mutations_sync=1; +select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1); +select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.`Tuple(a UInt64)`.a from test order by x; + +select 'insert nested dynamic'; +insert into test select number, number, [number % 2 ? number : 'str_' || toString(number)]::Array(Dynamic) from numbers(15, 3); +select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1); +select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.`Tuple(a UInt64)`.a, d1.`Array(Dynamic)`.UInt64, d1.`Array(Dynamic)`.String, d1.`Array(Dynamic)`.Date from test order by x; + +select 'alter rename column 2'; +alter table test rename column d1 to d2 settings mutations_sync=1; +select count(), dynamicType(d2) from test group by dynamicType(d2) order by count(), dynamicType(d2); +select x, y, d2, d2.String, d2.UInt64, d2.Date, d2.`Tuple(a UInt64)`.a, d2.`Array(Dynamic)`.UInt64, d2.`Array(Dynamic)`.String, d2.`Array(Dynamic)`.Date, from test order by x; + +drop table test; diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.sh b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh index 3d802485be3..c8bd533e253 100755 --- a/tests/queries/0_stateless/03041_dynamic_type_check_table.sh +++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh @@ -42,4 +40,3 @@ echo "MergeTree wide" $CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" run $CH_CLIENT -q "drop table test;" - diff --git a/tests/queries/0_stateless/03131_hilbert_coding.sql b/tests/queries/0_stateless/03131_hilbert_coding.sql index ed293dc6910..b16a0efad5d 100644 --- a/tests/queries/0_stateless/03131_hilbert_coding.sql +++ b/tests/queries/0_stateless/03131_hilbert_coding.sql @@ -46,6 +46,7 @@ drop table if exists hilbert_numbers_1_03131; select '----- ERRORS -----'; select hilbertEncode(); -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION } +select hilbertEncode(1, 2, 3); -- { serverError TOO_MANY_ARGUMENTS_FOR_FUNCTION } select hilbertDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } select hilbertEncode('text'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } select hilbertDecode('text', 'text'); -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.reference b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference new file mode 100644 index 00000000000..2850a8aba98 --- /dev/null +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference @@ -0,0 +1,2 @@ +49999983751397 10000032 +49999983751397 10000032 diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql new file mode 100644 index 00000000000..17a67511030 --- /dev/null +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -0,0 +1,50 @@ +-- Tags: long + +DROP TABLE IF EXISTS build; +DROP TABLE IF EXISTS skewed_probe; + +SET session_timezone = 'UTC'; + +CREATE TABLE build ENGINE = MergeTree ORDER BY (key, begin) +AS + SELECT + toDateTime('1990-03-21 13:00:00') + INTERVAL number MINUTE AS begin, + number % 4 AS key, + number AS value + FROM numbers(0, 10000000); + +CREATE TABLE skewed_probe ENGINE = MergeTree ORDER BY (key, begin) +AS + SELECT + toDateTime('1990-04-21 13:00:01') + INTERVAL number MINUTE AS begin, + 0 AS key + FROM numbers(0, 5) + UNION ALL + SELECT + toDateTime('1990-05-21 13:00:01') + INTERVAL number MINUTE AS begin, + 1 AS key + FROM numbers(0, 10) + UNION ALL + SELECT + toDateTime('1990-06-21 13:00:01') + INTERVAL number MINUTE AS begin, + 2 AS key + FROM numbers(0, 20) + UNION ALL + SELECT + toDateTime('1990-03-21 13:00:01') + INTERVAL number MINUTE AS begin, + 3 AS key + FROM numbers(0, 10000000); + + +SELECT SUM(value), COUNT(*) +FROM skewed_probe +ASOF JOIN build +USING (key, begin) +; + +SELECT SUM(value), COUNT(*) +FROM skewed_probe +ASOF JOIN build +USING (key, begin) +SETTINGS join_algorithm = 'full_sorting_merge' +; diff --git a/tests/queries/0_stateless/03144_asof_join_ddb_doubles.reference b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.reference new file mode 100644 index 00000000000..f130f0a3f3b --- /dev/null +++ b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.reference @@ -0,0 +1,58 @@ +1 0 +2 0 +3 1 +4 1 +5 1 +6 2 +7 2 +8 3 +9 3 +0 0 +1 0 +2 0 +3 1 +4 1 +5 1 +6 2 +7 2 +8 3 +9 3 +1 1 0 +1 2 0 +1 3 1 +1 4 1 +1 5 1 +1 6 2 +1 7 2 +1 8 3 +1 9 3 +2 0 10 +2 1 10 +2 2 10 +2 3 10 +2 4 10 +2 5 10 +2 6 10 +2 7 20 +2 8 20 +2 9 20 +1 0 0 +1 1 0 +1 2 0 +1 3 1 +1 4 1 +1 5 1 +1 6 2 +1 7 2 +1 8 3 +1 9 3 +2 0 10 +2 1 10 +2 2 10 +2 3 10 +2 4 10 +2 5 10 +2 6 10 +2 7 20 +2 8 20 +2 9 20 diff --git a/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql new file mode 100644 index 00000000000..87aece14628 --- /dev/null +++ b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql @@ -0,0 +1,65 @@ +SET join_algorithm = 'full_sorting_merge'; +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS events0; + +CREATE TABLE events0 ( + begin Float64, + value Int32 +) ENGINE = MergeTree ORDER BY begin; + +INSERT INTO events0 VALUES (1.0, 0), (3.0, 1), (6.0, 2), (8.0, 3); + +SELECT p.ts, e.value +FROM + (SELECT number :: Float64 AS ts FROM numbers(10)) p +ASOF JOIN events0 e +ON p.ts >= e.begin +ORDER BY p.ts ASC; + +SELECT p.ts, e.value +FROM + (SELECT number :: Float64 AS ts FROM numbers(10)) p +ASOF LEFT JOIN events0 e +ON p.ts >= e.begin +ORDER BY p.ts ASC +-- SETTINGS join_use_nulls = 1 +; + +DROP TABLE IF EXISTS events0; + +DROP TABLE IF EXISTS events; +DROP TABLE IF EXISTS probes; + +CREATE TABLE events ( + key Int32, + begin Float64, + value Int32 +) ENGINE = MergeTree ORDER BY (key, begin); + +INSERT INTO events VALUES (1, 1.0, 0), (1, 3.0, 1), (1, 6.0, 2), (1, 8.0, 3), (2, 0.0, 10), (2, 7.0, 20), (2, 11.0, 30); + +CREATE TABLE probes ( + key Int32, + ts Float64 +) ENGINE = MergeTree ORDER BY (key, ts) AS +SELECT + key.number, + ts.number +FROM + numbers(1, 2) as key, + numbers(10) as ts +SETTINGS join_algorithm = 'hash'; + +SELECT p.key, p.ts, e.value +FROM probes p +ASOF JOIN events e +ON p.key = e.key AND p.ts >= e.begin +ORDER BY p.key, p.ts ASC; + +SELECT p.key, p.ts, e.value +FROM probes p +ASOF LEFT JOIN events e +ON p.key = e.key AND p.ts >= e.begin +ORDER BY p.key, p.ts ASC NULLS FIRST; + diff --git a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference new file mode 100644 index 00000000000..4aac918c98c --- /dev/null +++ b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference @@ -0,0 +1,73 @@ +- +2023-03-21 12:00:00 1970-01-01 00:00:00 -1 +2023-03-21 13:00:00 1970-01-01 00:00:00 -1 +2023-03-21 14:00:00 2023-03-21 13:00:00 0 +2023-03-21 15:00:00 2023-03-21 14:00:00 1 +2023-03-21 16:00:00 2023-03-21 15:00:00 2 +2023-03-21 17:00:00 2023-03-21 16:00:00 3 +2023-03-21 18:00:00 2023-03-21 16:00:00 3 +2023-03-21 19:00:00 2023-03-21 16:00:00 3 +2023-03-21 20:00:00 2023-03-21 16:00:00 3 +2023-03-21 21:00:00 2023-03-21 16:00:00 3 +2027-10-18 11:03:27 2023-03-21 16:00:00 3 +- +2023-03-21 12:00:00 1970-01-01 00:00:00 -1 +2023-03-21 13:00:00 1970-01-01 00:00:00 -1 +2023-03-21 14:00:00 2023-03-21 13:00:00 0 +2023-03-21 15:00:00 2023-03-21 14:00:00 1 +2023-03-21 16:00:00 2023-03-21 15:00:00 2 +2023-03-21 17:00:00 2023-03-21 16:00:00 3 +2023-03-21 18:00:00 2023-03-21 16:00:00 3 +2023-03-21 19:00:00 2023-03-21 16:00:00 3 +2023-03-21 20:00:00 2023-03-21 16:00:00 3 +2023-03-21 21:00:00 2023-03-21 16:00:00 3 +2027-10-18 11:03:27 2023-03-21 16:00:00 3 +\N \N \N +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 13:00:00 0 +2023-03-21 14:00:00 2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2023-03-21 15:00:00 2 +2023-03-21 16:00:00 2023-03-21 16:00:00 3 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +2027-10-18 11:03:27 2027-10-18 11:03:27 9 +- +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 13:00:00 0 +2023-03-21 14:00:00 2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2023-03-21 15:00:00 2 +2023-03-21 16:00:00 2023-03-21 16:00:00 3 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +2027-10-18 11:03:27 2027-10-18 11:03:27 9 +\N \N \N +- +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 14:00:00 1 +2023-03-21 14:00:00 2023-03-21 15:00:00 2 +2023-03-21 15:00:00 2023-03-21 16:00:00 3 +2023-03-21 16:00:00 2027-10-18 11:03:27 9 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +- +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 14:00:00 1 +2023-03-21 14:00:00 2023-03-21 15:00:00 2 +2023-03-21 15:00:00 2023-03-21 16:00:00 3 +2023-03-21 16:00:00 2027-10-18 11:03:27 9 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +2027-10-18 11:03:27 \N \N +\N \N \N diff --git a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql new file mode 100644 index 00000000000..d67aa254bd6 --- /dev/null +++ b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql @@ -0,0 +1,66 @@ +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; + +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; + +CREATE TABLE events0 ( + begin Nullable(DateTime('UTC')), + value Int32 +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO events0 SELECT toDateTime('2023-03-21 13:00:00', 'UTC') + INTERVAL number HOUR, number FROM numbers(4); +INSERT INTO events0 VALUES (NULL, -10),('0000-01-01 00:00:00', -1), ('9999-12-31 23:59:59', 9); + +CREATE TABLE probe0 ( + begin Nullable(DateTime('UTC')) +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO probe0 SELECT toDateTime('2023-03-21 12:00:00', 'UTC') + INTERVAl number HOUR FROM numbers(10); +INSERT INTO probe0 VALUES (NULL),('9999-12-31 23:59:59'); + +SET join_use_nulls = 1; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF JOIN events0 e +ON p.begin > e.begin +ORDER BY p.begin ASC; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF LEFT JOIN events0 e +ON p.begin > e.begin +ORDER BY p.begin ASC; + +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF JOIN events0 e +ON p.begin <= e.begin +ORDER BY p.begin ASC; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF LEFT JOIN events0 e +ON p.begin <= e.begin +ORDER BY p.begin ASC; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF JOIN events0 e +ON p.begin < e.begin +ORDER BY p.begin ASC; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF LEFT JOIN events0 e +ON p.begin < e.begin +ORDER BY p.begin ASC; + +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; diff --git a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh index 7df2118ad0c..95aef9bbc5b 100755 --- a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh +++ b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh @@ -6,8 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -nm -q " drop table if exists tp_1; -create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100); -system stop merges tp_1; +create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100) settings max_parts_to_merge_at_once=1; insert into tp_1 select number, number from numbers(3); set mutations_sync = 2; @@ -39,7 +38,6 @@ $CLICKHOUSE_CLIENT -nm -q " set send_logs_level='fatal'; drop table tp_1; restore table tp_1 from Disk('backups', '$backup_id'); -system stop merges tp_1; " | grep -o "RESTORED" $CLICKHOUSE_CLIENT -q "select count() from tp_1;" diff --git a/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.reference b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.reference new file mode 100644 index 00000000000..ca481c7fff0 --- /dev/null +++ b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.reference @@ -0,0 +1,2 @@ +26790 1488 +26790 1488 diff --git a/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 new file mode 100644 index 00000000000..49ba70c471e --- /dev/null +++ b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 @@ -0,0 +1,39 @@ +-- Tags: long + +SET allow_experimental_analyzer=1; + +SET session_timezone = 'UTC'; + +{% for join_algorithm in ['default', 'full_sorting_merge'] -%} + +SET join_algorithm = '{{ join_algorithm }}'; + +-- TODO: enable once USING and `join_use_nulls` is supported by `full_sorting_merge` +-- SET join_use_nulls = 1; + +WITH build AS ( + SELECT + tk.number AS k, + toDateTime('2021-01-01 00:00:00') + INTERVAL i.number SECONDS AS t, + i.number % 37 AS v + FROM numbers(3000000) AS i + CROSS JOIN numbers(2) AS tk + SETTINGS join_algorithm = 'hash', join_use_nulls = 0 +), +probe AS ( + SELECT + tk.number AS k, + toDateTime('2021-01-01 00:00:30') + INTERVAL tt.number HOUR AS t + FROM numbers(2) AS tk + CROSS JOIN numbers(toUInt32((toDateTime('2021-02-01 00:00:30') - toDateTime('2021-01-01 00:00:30')) / 3600)) AS tt + SETTINGS join_algorithm = 'hash', join_use_nulls = 0 +) +SELECT + SUM(v) AS v, + COUNT(*) AS n +FROM probe +ASOF LEFT JOIN build +USING (k, t) +; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03147_asof_join_ddb_missing.reference b/tests/queries/0_stateless/03147_asof_join_ddb_missing.reference new file mode 100644 index 00000000000..11eb84463f4 --- /dev/null +++ b/tests/queries/0_stateless/03147_asof_join_ddb_missing.reference @@ -0,0 +1,10 @@ +108 +108 27 +513 +1218 +3528 +14553 +121275 +1495503 +12462525 +1249625025 diff --git a/tests/queries/0_stateless/03147_asof_join_ddb_missing.sql b/tests/queries/0_stateless/03147_asof_join_ddb_missing.sql new file mode 100644 index 00000000000..95a5f8ab3ff --- /dev/null +++ b/tests/queries/0_stateless/03147_asof_join_ddb_missing.sql @@ -0,0 +1,186 @@ +SET allow_experimental_analyzer=1; + +SET session_timezone = 'UTC'; +SET joined_subquery_requires_alias = 0; +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; + +-- # 10 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(10), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # Coverage: Missing right side bin +WITH build AS ( + SELECT + k * 2 AS k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(10), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + intDiv(k, 2) AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v), COUNT(*) +FROM probe ASOF JOIN build USING (k, t); + +-- # 20 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(20), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 30 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(30), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 50 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(50), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 100 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(100), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 100 dates, 50 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(100), (SELECT number AS k FROM numbers(50)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 1000 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(1000), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 1000 dates, 50 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(1000), (SELECT number AS k FROM numbers(50)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 10000 dates, 50 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(10000), (SELECT number AS k FROM numbers(50)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); diff --git a/tests/queries/0_stateless/03148_asof_join_ddb_subquery.reference b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.reference new file mode 100644 index 00000000000..387a4a8f249 --- /dev/null +++ b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.reference @@ -0,0 +1,4 @@ +1 1 +3 1 +6 1 +8 1 diff --git a/tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql new file mode 100644 index 00000000000..2ddf0f09b1e --- /dev/null +++ b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS events; +CREATE TABLE events (begin Float64, value Int32) ENGINE = MergeTree() ORDER BY begin; + +INSERT INTO events VALUES (1, 0), (3, 1), (6, 2), (8, 3); + +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; +SET joined_subquery_requires_alias = 0; + +SELECT + begin, + value IN ( + SELECT e1.value + FROM ( + SELECT * + FROM events e1 + WHERE e1.value = events.value + ) AS e1 + ASOF JOIN ( + SELECT number :: Float64 AS begin + FROM numbers(10) + WHERE number >= 1 AND number < 10 + ) + USING (begin) + ) +FROM events +ORDER BY begin ASC; + +DROP TABLE IF EXISTS events; diff --git a/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference new file mode 100644 index 00000000000..7cfc85d23a5 --- /dev/null +++ b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference @@ -0,0 +1,56 @@ +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +2023-03-21 12:00:00 \N +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +\N \N +2023-03-21 12:00:00 0 +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +\N 0 +2023-03-21 12:00:00 \N +2023-03-21 13:00:00 \N +2023-03-21 14:00:00 \N +2023-03-21 15:00:00 \N +2023-03-21 16:00:00 \N +2023-03-21 17:00:00 \N +2023-03-21 18:00:00 \N +2023-03-21 19:00:00 \N +2023-03-21 20:00:00 \N +2023-03-21 21:00:00 \N +2106-02-07 06:28:15 \N +\N \N diff --git a/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql new file mode 100644 index 00000000000..cd83d62dc70 --- /dev/null +++ b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql @@ -0,0 +1,95 @@ +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; + +SET session_timezone = 'UTC'; +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; +SET join_use_nulls = 1; + +CREATE TABLE events0 +ENGINE = MergeTree() +ORDER BY COALESCE(begin, toDateTime('9999-12-31 23:59:59')) +AS +SELECT + toNullable(toDateTime('2023-03-21 13:00:00') + INTERVAL number HOUR) AS begin, + number AS value +FROM numbers(4); + +INSERT INTO events0 VALUES (NULL, -1), (toDateTime('9999-12-31 23:59:59'), 9); + +CREATE TABLE probe0 +ENGINE = MergeTree() +ORDER BY COALESCE(begin, toDateTime('9999-12-31 23:59:59')) +AS +SELECT + toNullable(toDateTime('2023-03-21 12:00:00') + INTERVAL number HOUR) AS begin +FROM numbers(10); + +INSERT INTO probe0 VALUES (NULL), (toDateTime('9999-12-31 23:59:59')); + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF JOIN events0 e ON p.begin >= e.begin +ORDER BY p.begin ASC; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF JOIN events0 e USING (begin) +ORDER BY p.begin ASC +SETTINGS join_use_nulls = 0 +; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF LEFT JOIN events0 e ON p.begin >= e.begin +ORDER BY p.begin ASC; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF LEFT JOIN events0 e USING (begin) +ORDER BY p.begin ASC +SETTINGS join_use_nulls = 0 +; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF RIGHT JOIN events0 e ON p.begin >= e.begin +ORDER BY e.begin ASC; -- { serverError NOT_IMPLEMENTED} + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF RIGHT JOIN events0 e USING (begin) +ORDER BY e.begin ASC; -- { serverError NOT_IMPLEMENTED} + + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF LEFT JOIN ( + SELECT * FROM events0 WHERE log(value + 5) > 10 + ) e ON p.begin + INTERVAL 2 HOUR >= e.begin + INTERVAL 1 HOUR +ORDER BY p.begin ASC; + + +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; diff --git a/tests/queries/0_stateless/03156_default_multiquery_split.reference b/tests/queries/0_stateless/03156_default_multiquery_split.reference new file mode 100644 index 00000000000..0f3a1baff45 --- /dev/null +++ b/tests/queries/0_stateless/03156_default_multiquery_split.reference @@ -0,0 +1,10 @@ +Syntax error +101 +102 +1 +2; +3 +4 +7 +8 +9 diff --git a/tests/queries/0_stateless/03156_default_multiquery_split.sh b/tests/queries/0_stateless/03156_default_multiquery_split.sh new file mode 100755 index 00000000000..ac64c2d093d --- /dev/null +++ b/tests/queries/0_stateless/03156_default_multiquery_split.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-ordinary-database + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +SQL_FILE_NAME=$"03156_default_multiquery_split_$$.sql" + +# The old multiquery implementation uses '\n' to split INSERT query segmentation +# this case is mainly to test the following situations +# 1. INSERT(format=Values) query: split by ';' +# 2. INSERT(format is not Values) query: split by '\n\n' instead of ';', then discard the ramaining data +# 3. INSERT(format is not Values) query: split by '\n\n', causing the trailing ';'' to be treated as the part of last value +# 4. The client uses multiquery by default, regardless of whether multiquery option is used. + +# create table test1, test2, then +# 1. insert 101, 102 into test1 +# 2. insert 1, 2; into test2, ';' will be treated as a part of a value +# 3. insert 3, 4; '6' will be treated as the next query because of the empty line, we use empty line to determine the end of insert query(format IS NOT VALUES) +# '6' will cause Syntax error +cat << EOF > "$SQL_FILE_NAME" +DROP TABLE IF EXISTS TEST1; +DROP TABLE IF EXISTS TEST2; +CREATE TABLE TEST1 (value Float64) ENGINE=MergeTree ORDER BY tuple(); +CREATE TABLE TEST2 (value String) ENGINE=MergeTree ORDER BY tuple(); +INSERT INTO TEST1 VALUES +(101), +(102); +INSERT INTO TEST2 FORMAT CSV +1 +2; + +INSERT INTO TEST2 FORMAT CSV +3 +4 + +6 +EOF + +$CLICKHOUSE_CLIENT -m < "$SQL_FILE_NAME" 2>&1 | grep -o 'Syntax error' + +# insert 7, 8, 9 into test2, because we use semicolon to determine the end of insert query(format is VALUES) +# then select all data from test1 and test2 +cat << EOF > "$SQL_FILE_NAME" +INSERT INTO TEST2 VALUES +('7'), +('8'), + +('9'); + +SELECT * FROM TEST1 ORDER BY value; +SELECT * FROM TEST2 ORDER BY value; +DROP TABLE TEST1; DROP TABLE TEST2; +EOF + +$CLICKHOUSE_CLIENT -m -n < "$SQL_FILE_NAME" + +rm "$SQL_FILE_NAME" diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh index 723b11ad620..0c585d36348 100755 --- a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh +++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh @@ -6,8 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function test { - $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_binary_encode_types_in_binary_format=1 -q "select $1 as value format RowBinaryWithNamesAndTypes" | $CLICKHOUSE_LOCAL --input-format RowBinaryWithNamesAndTypes --input_format_binary_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" - $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_native_encode_types_in_binary_format=1 -q "select $1 as value format Native" | $CLICKHOUSE_LOCAL --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" + $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_binary_encode_types_in_binary_format=1 -q "select $1 as value format RowBinaryWithNamesAndTypes" | $CLICKHOUSE_LOCAL --input-format RowBinaryWithNamesAndTypes --input_format_binary_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" + $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_native_encode_types_in_binary_format=1 -q "select $1 as value format Native" | $CLICKHOUSE_LOCAL --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" } test "materialize(42)::UInt8" diff --git a/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.reference b/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.reference deleted file mode 100644 index 1508c24f410..00000000000 --- a/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.reference +++ /dev/null @@ -1,55 +0,0 @@ --- { echoOn } -SELECT concatWithSeparator('.', toUInt128(6), '666' as b, materialize(toLowCardinality(8))) -FROM system.one -GROUP BY '666'; -6.666.8 -SELECT concatWithSeparator('.', toUInt128(6), '666' as b, materialize(toLowCardinality(8))) -FROM remote('127.0.0.{1,1}', 'system.one') -GROUP BY '666'; -6.666.8 --- https://github.com/ClickHouse/ClickHouse/issues/63006 -SELECT - 6, - concat(' World', toUInt128(6), 6, 6, 6, toNullable(6), materialize(toLowCardinality(toNullable(toUInt128(6))))) AS a, - concat(concat(' World', 6, toLowCardinality(6), ' World', toUInt256(6), materialize(6), 6, toNullable(6), 6, 6, NULL, 6, 6), ' World', 6, 6, 6, 6, toUInt256(6), NULL, 6, 6) AS b -FROM system.one -GROUP BY toNullable(6) - WITH ROLLUP -WITH TOTALS; -6 World666666 \N -6 World666666 \N - -6 World666666 \N -SELECT - 6, - concat(' World', toUInt128(6), 6, 6, 6, toNullable(6), materialize(toLowCardinality(toNullable(toUInt128(6))))) AS a, - concat(concat(' World', 6, toLowCardinality(6), ' World', toUInt256(6), materialize(6), 6, toNullable(6), 6, 6, NULL, 6, 6), ' World', 6, 6, 6, 6, toUInt256(6), NULL, 6, 6) AS b -FROM remote('127.0.0.1') -GROUP BY toNullable(6) - WITH ROLLUP - WITH TOTALS; -6 World666666 \N -6 World666666 \N - -6 World666666 \N --- { echoOn } -SELECT - '%', - tuple(concat('%', 1, toLowCardinality(toLowCardinality(toNullable(materialize(1)))), currentDatabase(), 101., toNullable(13), '%AS%id_02%', toNullable(toNullable(10)), toLowCardinality(toNullable(10)), 10, 10)), - (toDecimal128(99.67, 6), 36, 61, 14) -FROM dist_03174 -WHERE dummy IN (0, '255') -GROUP BY - toNullable(13), - (99.67, 61, toLowCardinality(14)); -% ('%11default10113%AS%id_02%10101010') (99.67,36,61,14) --- { echoOn } -SELECT - 38, - concat(position(concat(concat(position(concat(toUInt256(3)), 'ca', 2), 3), NULLIF(1, materialize(toLowCardinality(1)))), toLowCardinality(toNullable('ca'))), concat(NULLIF(1, 1), concat(3), toNullable(3))) -FROM set_index_not__fuzz_0 -GROUP BY - toNullable(3), - concat(concat(CAST(NULL, 'Nullable(Int8)'), toNullable(3))) -FORMAT Null -SETTINGS max_threads = 1, allow_experimental_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 3, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1; diff --git a/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.sql b/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.sql deleted file mode 100644 index d397d30e285..00000000000 --- a/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.sql +++ /dev/null @@ -1,80 +0,0 @@ --- There are various tests that check that group by keys don't propagate into functions replacing const arguments --- by full (empty) columns - -DROP TABLE IF EXISTS dist_03174; -DROP TABLE IF EXISTS set_index_not__fuzz_0; - --- https://github.com/ClickHouse/ClickHouse/issues/63006 - -SET allow_experimental_analyzer=1; - --- { echoOn } -SELECT concatWithSeparator('.', toUInt128(6), '666' as b, materialize(toLowCardinality(8))) -FROM system.one -GROUP BY '666'; - -SELECT concatWithSeparator('.', toUInt128(6), '666' as b, materialize(toLowCardinality(8))) -FROM remote('127.0.0.{1,1}', 'system.one') -GROUP BY '666'; - --- https://github.com/ClickHouse/ClickHouse/issues/63006 -SELECT - 6, - concat(' World', toUInt128(6), 6, 6, 6, toNullable(6), materialize(toLowCardinality(toNullable(toUInt128(6))))) AS a, - concat(concat(' World', 6, toLowCardinality(6), ' World', toUInt256(6), materialize(6), 6, toNullable(6), 6, 6, NULL, 6, 6), ' World', 6, 6, 6, 6, toUInt256(6), NULL, 6, 6) AS b -FROM system.one -GROUP BY toNullable(6) - WITH ROLLUP -WITH TOTALS; - -SELECT - 6, - concat(' World', toUInt128(6), 6, 6, 6, toNullable(6), materialize(toLowCardinality(toNullable(toUInt128(6))))) AS a, - concat(concat(' World', 6, toLowCardinality(6), ' World', toUInt256(6), materialize(6), 6, toNullable(6), 6, 6, NULL, 6, 6), ' World', 6, 6, 6, 6, toUInt256(6), NULL, 6, 6) AS b -FROM remote('127.0.0.1') -GROUP BY toNullable(6) - WITH ROLLUP - WITH TOTALS; - --- https://github.com/ClickHouse/ClickHouse/issues/64945 --- { echoOff } -CREATE TABLE dist_03174 AS system.one ENGINE = Distributed(test_cluster_two_shards, system, one, dummy); - --- { echoOn } -SELECT - '%', - tuple(concat('%', 1, toLowCardinality(toLowCardinality(toNullable(materialize(1)))), currentDatabase(), 101., toNullable(13), '%AS%id_02%', toNullable(toNullable(10)), toLowCardinality(toNullable(10)), 10, 10)), - (toDecimal128(99.67, 6), 36, 61, 14) -FROM dist_03174 -WHERE dummy IN (0, '255') -GROUP BY - toNullable(13), - (99.67, 61, toLowCardinality(14)); - --- Parallel replicas --- { echoOff } -CREATE TABLE set_index_not__fuzz_0 -( - `name` String, - `status` Enum8('alive' = 0, 'rip' = 1), - INDEX idx_status status TYPE set(2) GRANULARITY 1 -) -ENGINE = MergeTree() -ORDER BY name; - -INSERT INTO set_index_not__fuzz_0 SELECT * FROM generateRandom() LIMIT 10; - --- { echoOn } -SELECT - 38, - concat(position(concat(concat(position(concat(toUInt256(3)), 'ca', 2), 3), NULLIF(1, materialize(toLowCardinality(1)))), toLowCardinality(toNullable('ca'))), concat(NULLIF(1, 1), concat(3), toNullable(3))) -FROM set_index_not__fuzz_0 -GROUP BY - toNullable(3), - concat(concat(CAST(NULL, 'Nullable(Int8)'), toNullable(3))) -FORMAT Null -SETTINGS max_threads = 1, allow_experimental_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 3, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1; - --- { echoOff } -DROP TABLE IF EXISTS dist_03174; -DROP TABLE IF EXISTS set_index_not__fuzz_0; diff --git a/tests/queries/0_stateless/03198_table_function_directory_path.reference b/tests/queries/0_stateless/03198_table_function_directory_path.reference index 19920de3d3c..74cd8c6d31f 100644 --- a/tests/queries/0_stateless/03198_table_function_directory_path.reference +++ b/tests/queries/0_stateless/03198_table_function_directory_path.reference @@ -1,3 +1,4 @@ 2 2 1 +1 diff --git a/tests/queries/0_stateless/03198_table_function_directory_path.sql b/tests/queries/0_stateless/03198_table_function_directory_path.sql index 9e2791847af..90f687ed6a3 100644 --- a/tests/queries/0_stateless/03198_table_function_directory_path.sql +++ b/tests/queries/0_stateless/03198_table_function_directory_path.sql @@ -1,5 +1,6 @@ -- Tags: no-parallel +INSERT INTO FUNCTION file('data_03198_table_function_directory_path.csv', 'csv') SELECT '1.csv' SETTINGS engine_file_truncate_on_insert=1; INSERT INTO FUNCTION file('data_03198_table_function_directory_path/1.csv', 'csv') SELECT '1.csv' SETTINGS engine_file_truncate_on_insert=1; INSERT INTO FUNCTION file('data_03198_table_function_directory_path/2.csv', 'csv') SELECT '2.csv' SETTINGS engine_file_truncate_on_insert=1; INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir/3.csv', 'csv') SELECT '3.csv' SETTINGS engine_file_truncate_on_insert=1; @@ -11,3 +12,4 @@ SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/'); SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/dir'); SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/*/dir', 'csv'); -- { serverError 74, 636 } SELECT COUNT(*) FROM file('data_03198_table_function_directory_pat'); -- { serverError 400 } +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path.csv'); diff --git a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh index 8231691e184..c9dca46f41e 100755 --- a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh +++ b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh @@ -17,8 +15,7 @@ function test() $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id" $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id" $CH_CLIENT -q "select id from test where v.UInt64 is null order by id" - - $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(1000000) settings min_insert_block_size_rows=100000" + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(250000) settings min_insert_block_size_rows=100000, min_insert_block_size_bytes=0" $CH_CLIENT -q "select v, v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null" $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null" $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id format Null" @@ -41,4 +38,3 @@ echo "MergeTree wide" $CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" test $CH_CLIENT -q "drop table test;" - diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh index 968c9e5271f..bbe25ea72bb 100755 --- a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh @@ -2,8 +2,6 @@ # Tags: long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/03203_client_benchmark_options.sh b/tests/queries/0_stateless/03203_client_benchmark_options.sh index a9b9d69822b..37a1f2cd3ac 100755 --- a/tests/queries/0_stateless/03203_client_benchmark_options.sh +++ b/tests/queries/0_stateless/03203_client_benchmark_options.sh @@ -4,10 +4,23 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -t -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1 | grep -q "^2\." && echo "Ok" || echo "Fail" -${CLICKHOUSE_CLIENT} --time -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1 | grep -q "^2\." && echo "Ok" || echo "Fail" -${CLICKHOUSE_CLIENT} --memory-usage -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "^[0-9]\+$" && echo "Ok" || echo "Fail" -${CLICKHOUSE_CLIENT} --memory-usage=none -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" # expected no output -${CLICKHOUSE_CLIENT} --memory-usage=default -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "^[0-9]\+$" && echo "Ok" || echo "Fail" -${CLICKHOUSE_CLIENT} --memory-usage=readable -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "^[0-9].*B$" && echo "Ok" || echo "Fail" -${CLICKHOUSE_CLIENT} --memory-usage=unknown -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "BAD_ARGUMENTS" && echo "Ok" || echo "Fail" +output=$(${CLICKHOUSE_CLIENT} -t -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1) +{ echo "$output" | grep -q "^2\." && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } + +output=$(${CLICKHOUSE_CLIENT} --time -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1) +{ echo "$output" | grep -q "^2\." && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } + +output=$(${CLICKHOUSE_CLIENT} --memory-usage -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) +{ echo "$output" | grep -q "^[0-9]\+$" && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } + +output=$(${CLICKHOUSE_CLIENT} --memory-usage=none -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) +echo -n "$output" # expected no output + +output=$(${CLICKHOUSE_CLIENT} --memory-usage=default -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) +{ echo "$output" | grep -q "^[0-9]\+$" && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } + +output=$(${CLICKHOUSE_CLIENT} --memory-usage=readable -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) +{ echo "$output" | grep -q "^[0-9].*B$" && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } + +output=$(${CLICKHOUSE_CLIENT} --memory-usage=unknown -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1) +{ echo "$output" | grep -q "BAD_ARGUMENTS" && echo "Ok"; } || { echo "Fail"; echo "'$output'"; } diff --git a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql new file mode 100644 index 00000000000..bb3269da597 --- /dev/null +++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql @@ -0,0 +1,4 @@ +CREATE TABLE t (p UInt8, x UInt64) Engine = MergeTree PARTITION BY p ORDER BY x; +INSERT INTO t SELECT 0, number FROM numbers(10) SETTINGS max_block_size = 100; +SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 0; +SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/03203_variant_convert_field_to_type_bug.reference b/tests/queries/0_stateless/03203_variant_convert_field_to_type_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03203_variant_convert_field_to_type_bug.sql b/tests/queries/0_stateless/03203_variant_convert_field_to_type_bug.sql new file mode 100644 index 00000000000..b73bb8ffa6d --- /dev/null +++ b/tests/queries/0_stateless/03203_variant_convert_field_to_type_bug.sql @@ -0,0 +1,5 @@ +set allow_experimental_variant_type=1; +set use_variant_as_common_type=1; + +SELECT * FROM numbers([tuple(1, 2), NULL], 2); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} + diff --git a/tests/queries/0_stateless/03204_format_join_on.reference b/tests/queries/0_stateless/03204_format_join_on.reference new file mode 100644 index 00000000000..846f36fcca4 --- /dev/null +++ b/tests/queries/0_stateless/03204_format_join_on.reference @@ -0,0 +1,4 @@ +SELECT * FROM t1 INNER JOIN t2 ON ((t1.x = t2.x) AND (t1.x IS NULL) AS e2) +SELECT * FROM t1 INNER JOIN t2 ON ((t1.x = t2.x) AND (t1.x IS NULL) AS e2) +SELECT * FROM t1 INNER JOIN t2 ON (t1.x = t2.x) AND ((t1.x IS NULL) AS e2) +SELECT * FROM t1 INNER JOIN t2 ON t1.x = t2.x diff --git a/tests/queries/0_stateless/03204_format_join_on.sh b/tests/queries/0_stateless/03204_format_join_on.sh new file mode 100755 index 00000000000..87b0afac042 --- /dev/null +++ b/tests/queries/0_stateless/03204_format_join_on.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# regression for the JOIN ON alias for the whole expression +phase1="$($CLICKHOUSE_FORMAT --oneline --query "SELECT * FROM t1 JOIN t2 ON ((t1.x = t2.x) AND (t1.x IS NULL) AS e2)")" +echo "$phase1" +# phase 2 +$CLICKHOUSE_FORMAT --oneline --query "$phase1" + +# other test cases +$CLICKHOUSE_FORMAT --oneline --query "SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x) AND (t1.x IS NULL AS e2)" +$CLICKHOUSE_FORMAT --oneline --query "SELECT * FROM t1 JOIN t2 ON t1.x = t2.x" diff --git a/tests/queries/0_stateless/03205_parallel_replicas_alter_select_ubsan.reference b/tests/queries/0_stateless/03205_parallel_replicas_alter_select_ubsan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03205_parallel_replicas_alter_select_ubsan.sql b/tests/queries/0_stateless/03205_parallel_replicas_alter_select_ubsan.sql new file mode 100644 index 00000000000..2ec9368327d --- /dev/null +++ b/tests/queries/0_stateless/03205_parallel_replicas_alter_select_ubsan.sql @@ -0,0 +1,35 @@ +SET alter_sync = 2; +SET max_parallel_replicas = 3, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_for_non_replicated_merge_tree = true; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t1__fuzz_26; + +CREATE TABLE t1__fuzz_26 (`a` Nullable(Float64), `b` Nullable(Float32), `pk` Int64) ENGINE = MergeTree ORDER BY pk; +CREATE TABLE t1 ( a Float64, b Int64, pk String) Engine = MergeTree() ORDER BY pk; + +ALTER TABLE t1 + (MODIFY COLUMN `a` Float64 TTL toDateTime(b) + toIntervalMonth(viewExplain('EXPLAIN', 'actions = 1', ( + SELECT + toIntervalMonth(1), + 2 + FROM t1__fuzz_26 + GROUP BY + toFixedString('%Prewhere%', 10), + toNullable(12) + WITH ROLLUP + )), 1)) settings allow_experimental_parallel_reading_from_replicas = 1; -- { serverError INCORRECT_RESULT_OF_SCALAR_SUBQUERY } + +ALTER TABLE t1 + (MODIFY COLUMN `a` Float64 TTL toDateTime(b) + toIntervalMonth(viewExplain('EXPLAIN', 'actions = 1', ( + SELECT + toIntervalMonth(1), + 2 + FROM t1__fuzz_26 + GROUP BY + toFixedString('%Prewhere%', 10), + toNullable(12) + WITH ROLLUP + )), 1)) settings allow_experimental_parallel_reading_from_replicas = 0; -- { serverError INCORRECT_RESULT_OF_SCALAR_SUBQUERY } + +DROP TABLE t1; +DROP TABLE t1__fuzz_26; diff --git a/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.reference b/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.reference new file mode 100644 index 00000000000..11277a62b06 --- /dev/null +++ b/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.reference @@ -0,0 +1 @@ +Hello world diff --git a/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.sh b/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.sh new file mode 100755 index 00000000000..00efd1f4591 --- /dev/null +++ b/tests/queries/0_stateless/03206_no_exceptions_clickhouse_local.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION=1 ${CLICKHOUSE_LOCAL} --query "SELECT * FROM table" --input-format CSV <<<"Hello, world" diff --git a/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference new file mode 100644 index 00000000000..e84856c90fd --- /dev/null +++ b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference @@ -0,0 +1,13 @@ +010101 AggregateFunction(groupArrayIntersect, Array(UInt8)) +[1] +1 [2,4,6,8,10] +2 [2,4,6,8,10] +3 [2,4,6,8,10] +5 [2,6,10] +6 [10] +7 [] +a [(['2','4','6','8','10'])] +b [(['2','4','6','8','10'])] +c [(['2','4','6','8','10'])] +d [] +e [] diff --git a/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql new file mode 100644 index 00000000000..1b3d48ce0c3 --- /dev/null +++ b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql @@ -0,0 +1,43 @@ +SELECT hex(groupArrayIntersectState([1]) AS a), toTypeName(a); +SELECT finalizeAggregation(CAST(unhex('010101'), 'AggregateFunction(groupArrayIntersect, Array(UInt8))')); + +DROP TABLE IF EXISTS grouparray; +CREATE TABLE grouparray +( + `v` AggregateFunction(groupArrayIntersect, Array(UInt8)) +) +ENGINE = Log; + +INSERT INTO grouparray Select groupArrayIntersectState([2, 4, 6, 8, 10]::Array(UInt8)); +SELECT '1', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([2, 4, 6, 8, 10]::Array(UInt8)); +SELECT '2', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]::Array(UInt8)); +SELECT '3', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([2, 6, 10]::Array(UInt8)); +SELECT '5', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([10]::Array(UInt8)); +SELECT '6', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([]::Array(UInt8)); +SELECT '7', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; + +DROP TABLE IF EXISTS grouparray; + + +DROP TABLE IF EXISTS grouparray_string; +CREATE TABLE grouparray_string +( + `v` AggregateFunction(groupArrayIntersect, Array(Tuple(Array(String)))) +) +ENGINE = Log; + +INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', '6', '8', '10'])]); +SELECT 'a', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; +INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', '6', '8', '10']), tuple(['2', '4', '6', '8', '10'])]); +SELECT 'b', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; +INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', '6', '8', '10']), tuple(['2', '4', '6', '8', '10', '14'])]); +SELECT 'c', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; +INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', '6', '8', '10', '20']), tuple(['2', '4', '6', '8', '10', '14'])]); +SELECT 'd', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; +INSERT INTO grouparray_string Select groupArrayIntersectState([]::Array(Tuple(Array(String)))); +SELECT 'e', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; diff --git a/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.reference b/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.reference new file mode 100644 index 00000000000..a1afeb1ab82 --- /dev/null +++ b/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.reference @@ -0,0 +1 @@ +SELECT NOT ((SELECT 1)) diff --git a/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.sh b/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.sh new file mode 100755 index 00000000000..594d316b621 --- /dev/null +++ b/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_FORMAT --oneline --query "SELECT NOT((SELECT 1))" diff --git a/tests/queries/0_stateless/03208_numbers_total_rows_approx.reference b/tests/queries/0_stateless/03208_numbers_total_rows_approx.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/03208_numbers_total_rows_approx.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/03208_numbers_total_rows_approx.sql b/tests/queries/0_stateless/03208_numbers_total_rows_approx.sql new file mode 100644 index 00000000000..7855dfb6207 --- /dev/null +++ b/tests/queries/0_stateless/03208_numbers_total_rows_approx.sql @@ -0,0 +1 @@ +SELECT number FROM numbers(2, 1) WHERE number % 2 = 0 SETTINGS max_rows_to_read = 10; diff --git a/tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.reference b/tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.reference new file mode 100644 index 00000000000..e02f3666d40 --- /dev/null +++ b/tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.reference @@ -0,0 +1 @@ +0 0 0 1.1 diff --git a/tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.sql b/tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.sql new file mode 100644 index 00000000000..a05b07d5971 --- /dev/null +++ b/tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.sql @@ -0,0 +1,2 @@ +WITH '{ "v":1.1}' AS raw SELECT JSONExtract(raw, 'float') AS float32_1, JSONExtract(concat(tuple('1970-01-05', 10, materialize(10), 10, 10, 10, toUInt256(10), 10, toNullable(10), 10, 10), materialize(toUInt256(0)), ', ', 2, 2, toLowCardinality(toLowCardinality(2))), raw, toLowCardinality('v'), 'Float32') AS float32_2, JSONExtractFloat(raw) AS float64_1, JSONExtract(raw, 'v', 'double') AS float64_2; + diff --git a/tests/queries/0_stateless/03209_parallel_replicas_order_by_all.reference b/tests/queries/0_stateless/03209_parallel_replicas_order_by_all.reference new file mode 100644 index 00000000000..fd453d088a6 --- /dev/null +++ b/tests/queries/0_stateless/03209_parallel_replicas_order_by_all.reference @@ -0,0 +1,12 @@ +-- { echoOn } +SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = 0, allow_experimental_parallel_reading_from_replicas=0; +B 3 10 +D 1 20 +A 2 30 +C \N 40 +SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = 0, allow_experimental_parallel_reading_from_replicas=1; +B 3 10 +D 1 20 +A 2 30 +C \N 40 +DROP TABLE order_by_all SYNC; diff --git a/tests/queries/0_stateless/03209_parallel_replicas_order_by_all.sql b/tests/queries/0_stateless/03209_parallel_replicas_order_by_all.sql new file mode 100644 index 00000000000..eb3cddbbe07 --- /dev/null +++ b/tests/queries/0_stateless/03209_parallel_replicas_order_by_all.sql @@ -0,0 +1,20 @@ +DROP TABLE IF EXISTS order_by_all SYNC; +CREATE TABLE order_by_all +( + a String, + b Nullable(Int32), + all UInt64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_03210', 'r1') ORDER BY tuple(); + +INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30); + +SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, cluster_for_parallel_replicas='parallel_replicas'; +SET allow_experimental_analyzer=1; -- fix has been done only for the analyzer +SET enable_order_by_all = 0; + +-- { echoOn } +SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = 0, allow_experimental_parallel_reading_from_replicas=0; +SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = 0, allow_experimental_parallel_reading_from_replicas=1; + +DROP TABLE order_by_all SYNC; diff --git a/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.reference b/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.reference new file mode 100644 index 00000000000..3d6a23045fb --- /dev/null +++ b/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.reference @@ -0,0 +1,3 @@ +1 tx1 US +1 tx2 US +1 tx3 US diff --git a/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.sql b/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.sql new file mode 100644 index 00000000000..599875e90cf --- /dev/null +++ b/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS user_country; +DROP TABLE IF EXISTS user_transactions; + +CREATE TABLE user_country ( + user_id UInt64, + country String +) +ENGINE = ReplacingMergeTree +ORDER BY user_id; + +CREATE TABLE user_transactions ( + user_id UInt64, + transaction_id String +) +ENGINE = MergeTree +ORDER BY user_id; + +INSERT INTO user_country (user_id, country) VALUES (1, 'US'); +INSERT INTO user_transactions (user_id, transaction_id) VALUES (1, 'tx1'), (1, 'tx2'), (1, 'tx3'), (2, 'tx1'); + +-- Expected 3 rows, got only 1. Removing 'ANY' and adding 'FINAL' fixes +-- the issue (but it is not always possible). Moving filter by 'country' to +-- an outer query doesn't help. Query without filter by 'country' works +-- as expected (returns 3 rows). +SELECT * FROM user_transactions +ANY LEFT JOIN user_country USING (user_id) +WHERE + user_id = 1 + AND country = 'US' +ORDER BY ALL; + +DROP TABLE user_country; +DROP TABLE user_transactions; diff --git a/tests/queries/0_stateless/03210_fix_single_value_data_assertion.reference b/tests/queries/0_stateless/03210_fix_single_value_data_assertion.reference new file mode 100644 index 00000000000..d8f7e13db55 --- /dev/null +++ b/tests/queries/0_stateless/03210_fix_single_value_data_assertion.reference @@ -0,0 +1,12 @@ +0 1 1 1 0 0 0 +1 3 3 3 2 2 2 +2 5 5 5 4 4 4 +3 7 7 7 6 6 6 +4 9 9 9 8 8 8 +5 11 11 11 10 10 10 +6 13 13 13 12 12 12 +7 15 15 15 14 14 14 +8 17 17 17 16 16 16 +9 19 19 19 18 18 18 + +0 107351244 107351244 107351244 107354520 107354520 107354520 diff --git a/tests/queries/0_stateless/03210_fix_single_value_data_assertion.sql b/tests/queries/0_stateless/03210_fix_single_value_data_assertion.sql new file mode 100644 index 00000000000..a1243ef0b25 --- /dev/null +++ b/tests/queries/0_stateless/03210_fix_single_value_data_assertion.sql @@ -0,0 +1,19 @@ +SELECT + intDiv(number, 2) AS k, + sumArgMax(number, number % 20), + sumArgMax(number, leftPad(toString(number % 20), 5, '0')), -- Pad with 0 to preserve number ordering + sumArgMax(number, [number % 20, number % 20]), + sumArgMin(number, number % 20), + sumArgMin(number, leftPad(toString(number % 20), 5, '0')), + sumArgMin(number, [number % 20, number % 20]), +FROM +( + SELECT number + FROM system.numbers + LIMIT 65537 +) +GROUP BY k + WITH TOTALS +ORDER BY k ASC + LIMIT 10 +SETTINGS group_by_overflow_mode = 'any', totals_mode = 'before_having', max_rows_to_group_by = 100000; diff --git a/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.reference b/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.reference new file mode 100644 index 00000000000..836b526905a --- /dev/null +++ b/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.reference @@ -0,0 +1,7 @@ +ALTER TABLE columns_with_multiple_streams MODIFY COLUMN `field1` Nullable(tupleElement(x, 2), UInt8) +ALTER TABLE t_update_empty_nested ADD COLUMN `nested.arr2` Array(tuple('- ON NULL -', toLowCardinality(11), 11, 11, toLowCardinality(11), 11), UInt64) +ALTER TABLE t ADD COLUMN `x` Array(tuple(1), UInt8) +ALTER TABLE enum_alter_issue MODIFY COLUMN `a` Enum8(equals('one', timeSlots(timeSlots(arrayEnumerateDense(tuple('0.2147483646', toLowCardinality(toUInt128)), NULL), 4, 12.34, materialize(73), 2)), 1)) +ALTER TABLE t_sparse_mutations_3 MODIFY COLUMN `s` Tuple(Nullable(tupleElement(s, 1), UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(String)) +Syntax error +Syntax error diff --git a/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.sh b/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.sh new file mode 100755 index 00000000000..86c7a5469ca --- /dev/null +++ b/tests/queries/0_stateless/03210_inconsistent_formatting_of_data_types.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Ensure that these (possibly incorrect) queries can at least be parsed back after formatting. +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE columns_with_multiple_streams MODIFY COLUMN field1 Nullable(tupleElement(x, 2), UInt8)" | $CLICKHOUSE_FORMAT --oneline +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE t_update_empty_nested ADD COLUMN \`nested.arr2\` Array(tuple('- ON NULL -', toLowCardinality(11), 11, 11, toLowCardinality(11), 11), UInt64)" | $CLICKHOUSE_FORMAT --oneline +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE t ADD COLUMN x Array((1), UInt8)" | $CLICKHOUSE_FORMAT --oneline +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE enum_alter_issue (MODIFY COLUMN a Enum8(equals('one', timeSlots(timeSlots(arrayEnumerateDense(tuple('0.2147483646', toLowCardinality(toUInt128(12))), NULL), 4, 12.34, materialize(73), 2)), 1)))" | $CLICKHOUSE_FORMAT --oneline +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE t_sparse_mutations_3 MODIFY COLUMN s Tuple(Nullable(tupleElement(s, 1), UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(String))" | $CLICKHOUSE_FORMAT --oneline + +# These invalid queries don't parse and this is normal. +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE alter_compression_codec1 MODIFY COLUMN alter_column CODEC((2 + ignore(1, toUInt128(materialize(2)), 2 + toNullable(toNullable(3))), 3), NONE)" 2>&1 | grep -o -F 'Syntax error' +$CLICKHOUSE_FORMAT --oneline --query "ALTER TABLE test_table ADD COLUMN \`array\` Array(('110', 3, toLowCardinality(3), 3, toNullable(3), toLowCardinality(toNullable(3)), 3), UInt8) DEFAULT [1, 2, 3]" 2>&1 | grep -o -F 'Syntax error' diff --git a/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.reference b/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.reference new file mode 100644 index 00000000000..d717a29ab23 --- /dev/null +++ b/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.reference @@ -0,0 +1,19 @@ +DATA + ┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━┓ + ┃ c0 ┃ c1 ┃ c2 ┃ + ┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━┩ +1. │ 826636805 │ 0 │ │ + ├───────────┼───────────┼────┤ +2. │ 0 │ 150808457 │ │ + └───────────┴───────────┴────┘ +NUMBER OF ROWS IN FIRST SHOULD BE EQUAL TO SECOND +FISRT + +SECOND +1 +TO DEBUG I TOOK JUST A SUBQUERY AND IT HAS 1 ROW +THIRD +1 +AND I ADDED SINGLE CONDITION THAT CONDITION <>0 THAT IS 1 IN THIRD QUERY AND IT HAS NO RESULT!!! +FOURTH +1 diff --git a/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.sql b/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.sql new file mode 100644 index 00000000000..77b1d52dd18 --- /dev/null +++ b/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.sql @@ -0,0 +1,45 @@ +DROP TABLE IF EXISTS t0; + +CREATE TABLE t0 (c0 Int32, c1 Int32, c2 String) ENGINE = Log() ; +INSERT INTO t0(c0, c1, c2) VALUES (826636805,0, ''), (0, 150808457, ''); + +SELECT 'DATA'; +SELECT * FROM t0 FORMAT PrettyMonoBlock; + +SELECT 'NUMBER OF ROWS IN FIRST SHOULD BE EQUAL TO SECOND'; + + +SELECT 'FISRT'; +SELECT left.c2 FROM t0 AS left +LEFT ANTI JOIN t0 AS right_0 ON ((left.c0)=(right_0.c1)) +WHERE (abs ((- ((sign (right_0.c1)))))); + +SELECT 'SECOND'; +SELECT SUM(check <> 0) +FROM +( + SELECT (abs ((- ((sign (right_0.c1)))))) AS `check` + FROM t0 AS left + LEFT ANTI JOIN t0 AS right_0 ON ((left.c0)=(right_0.c1)) +); + + +SELECT 'TO DEBUG I TOOK JUST A SUBQUERY AND IT HAS 1 ROW'; + +SELECT 'THIRD'; + +SELECT (abs ((- ((sign (right_0.c1)))))) AS `check` +FROM t0 AS left +LEFT ANTI JOIN t0 AS right_0 ON ((left.c0)=(right_0.c1)); + + +SELECT 'AND I ADDED SINGLE CONDITION THAT CONDITION <>0 THAT IS 1 IN THIRD QUERY AND IT HAS NO RESULT!!!'; + + +SELECT 'FOURTH'; +SELECT (abs ((- ((sign (right_0.c1)))))) AS `check` +FROM t0 AS left +LEFT ANTI JOIN t0 AS right_0 ON ((left.c0)=(right_0.c1)) +WHERE check <> 0; + +DROP TABLE t0; diff --git a/tests/queries/0_stateless/03212_optimize_with_constraints_logical_error.reference b/tests/queries/0_stateless/03212_optimize_with_constraints_logical_error.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03212_optimize_with_constraints_logical_error.sql b/tests/queries/0_stateless/03212_optimize_with_constraints_logical_error.sql new file mode 100644 index 00000000000..16a27af986b --- /dev/null +++ b/tests/queries/0_stateless/03212_optimize_with_constraints_logical_error.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +EXPLAIN SYNTAX +WITH 1 AS compound_value SELECT * APPLY (x -> compound_value.*) +FROM test_table WHERE x > 0 +SETTINGS convert_query_to_cnf = true, optimize_using_constraints = true, optimize_substitute_columns = true; -- { serverError UNKNOWN_IDENTIFIER } + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/03212_thousand_exceptions.reference b/tests/queries/0_stateless/03212_thousand_exceptions.reference new file mode 100644 index 00000000000..2409ba16fcb --- /dev/null +++ b/tests/queries/0_stateless/03212_thousand_exceptions.reference @@ -0,0 +1 @@ +('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.')('.') \ No newline at end of file diff --git a/tests/queries/0_stateless/03212_thousand_exceptions.sh b/tests/queries/0_stateless/03212_thousand_exceptions.sh new file mode 100755 index 00000000000..0a6abf35c10 --- /dev/null +++ b/tests/queries/0_stateless/03212_thousand_exceptions.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# This should not be too slow, even under sanitizers. +yes "SELECT throwIf(1); SELECT '.' FORMAT Values;" | head -n 1000 | $CLICKHOUSE_CLIENT --multiquery --ignore-error 2>/dev/null diff --git a/tests/queries/0_stateless/03213_array_element_msan.reference b/tests/queries/0_stateless/03213_array_element_msan.reference new file mode 100644 index 00000000000..6844e39d0aa --- /dev/null +++ b/tests/queries/0_stateless/03213_array_element_msan.reference @@ -0,0 +1,5 @@ +10 +-- system.settings_profiles +-- system.settings_profiles +-- system.settings_profiles +-- system.settings_profiles diff --git a/tests/queries/0_stateless/03213_array_element_msan.sql b/tests/queries/0_stateless/03213_array_element_msan.sql new file mode 100644 index 00000000000..68955856792 --- /dev/null +++ b/tests/queries/0_stateless/03213_array_element_msan.sql @@ -0,0 +1,2 @@ +SELECT [[10, 2, 13, 15][toNullable(toLowCardinality(1))]][materialize(toLowCardinality(1))]; +SELECT '-- system.settings_profiles' GROUP BY [[[[[[[[[[10, toNullable(10)][1], [materialize(toLowCardinality(10)), 2][materialize(toLowCardinality(1))]][1]][materialize(materialize(1))], [10, 2, 1][1]][1]][1], 1][toLowCardinality(1)]][1], 1][1], 10][1], [[10, toLowCardinality(2)][toNullable(toLowCardinality(1))]][materialize(toLowCardinality(1))]][1], [[[[10, 2][1]][1]][1], [10, 2][materialize(1)], [[[2][1]][materialize(1)], 2, 1][1], [2, 10, toNullable(1)][1]] WITH CUBE; diff --git a/tests/queries/0_stateless/03213_distributed_analyzer.reference b/tests/queries/0_stateless/03213_distributed_analyzer.reference new file mode 100644 index 00000000000..9d63c0a7a5e --- /dev/null +++ b/tests/queries/0_stateless/03213_distributed_analyzer.reference @@ -0,0 +1 @@ +['digraph','{',' rankdir="LR";',' { node [shape = rect]',' n1 [label="Remote"];',' }','}','digraph','{',' rankdir="LR";',' { node [shape = rect]',' n1 [label="Remote"];',' }','}'] diff --git a/tests/queries/0_stateless/03213_distributed_analyzer.sql b/tests/queries/0_stateless/03213_distributed_analyzer.sql new file mode 100644 index 00000000000..ae0f13f08f0 --- /dev/null +++ b/tests/queries/0_stateless/03213_distributed_analyzer.sql @@ -0,0 +1,3 @@ +-- This triggered a nullptr dereference due to the confusion between old and new analyzers: +SELECT sum(*) FROM remote('127.0.0.4', currentDatabase(), viewExplain('EXPLAIN PIPELINE', 'graph = 1', (SELECT * FROM remote('127.0.0.4', system, one)))); -- { serverError UNKNOWN_FUNCTION } +SELECT groupArray(*) FROM cluster(test_cluster_two_shards, viewExplain('EXPLAIN PIPELINE', 'graph = 1', (SELECT * FROM remote('127.0.0.4', system, one)))); diff --git a/tests/queries/0_stateless/03213_rand_dos.reference b/tests/queries/0_stateless/03213_rand_dos.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03213_rand_dos.sql b/tests/queries/0_stateless/03213_rand_dos.sql new file mode 100644 index 00000000000..1250995bfe5 --- /dev/null +++ b/tests/queries/0_stateless/03213_rand_dos.sql @@ -0,0 +1,5 @@ +SELECT randChiSquared(-0.0000001); -- { serverError BAD_ARGUMENTS } +SELECT randChiSquared(-0.0); -- { serverError BAD_ARGUMENTS } +SELECT randStudentT(-0.); -- { serverError BAD_ARGUMENTS } +SELECT randFisherF(-0., 1); -- { serverError BAD_ARGUMENTS } +SELECT randFisherF(1, -0.); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 1a86cd9f8db..05651531fba 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -51,6 +51,14 @@ function check_replication_consistency() table_name_prefix=$1 check_query_part=$2 + # Try to kill some mutations because sometimes tests run too much (it's not guarenteed to kill all mutations, see below) + # Try multiple replicas, because queries are not finished yet, and "global" KILL MUTATION may fail due to another query (like DROP TABLE) + readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%'") + for t in "${tables_arr[@]}" + do + ${CLICKHOUSE_CLIENT} -q "KILL MUTATION WHERE database=currentDatabase() AND table='$t'" > /dev/null 2>/dev/null ||: + done + # Wait for all queries to finish (query may still be running if thread is killed by timeout) num_tries=0 while [[ $($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE current_database=currentDatabase() AND query LIKE '%$table_name_prefix%'") -ne 1 ]]; do @@ -96,7 +104,7 @@ function check_replication_consistency() some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%' ORDER BY rand() LIMIT 1") $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA $some_table PULL" 1>/dev/null 2>/dev/null ||: - # Forcefully cancel mutations to avoid waiting for them to finish + # Forcefully cancel mutations to avoid waiting for them to finish. Kills the remaining mutations ${CLICKHOUSE_CLIENT} -q "KILL MUTATION WHERE database=currentDatabase() AND table like '$table_name_prefix%'" > /dev/null # SYNC REPLICA is not enough if some MUTATE_PARTs are not assigned yet diff --git a/tests/queries/1_stateful/00162_mmap_compression_none.sql b/tests/queries/1_stateful/00162_mmap_compression_none.sql index d2cbcea8aaa..48d6ada821e 100644 --- a/tests/queries/1_stateful/00162_mmap_compression_none.sql +++ b/tests/queries/1_stateful/00162_mmap_compression_none.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS hits_none; CREATE TABLE hits_none (Title String CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -INSERT INTO hits_none SELECT Title FROM test.hits; +INSERT INTO hits_none SELECT Title FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16; SET min_bytes_to_use_mmap_io = 1; SELECT sum(length(Title)) FROM hits_none; diff --git a/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.sh b/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.sh index 2e1b807c496..f9fea2c1dad 100755 --- a/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.sh +++ b/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.sh @@ -17,11 +17,11 @@ $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE hits_s3_sampled FINAL" $CLICKHOUSE_CLIENT -q "SYSTEM DROP FILESYSTEM CACHE" # Warm up the cache -$CLICKHOUSE_CLIENT -q "SELECT * FROM hits_s3_sampled WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10 FORMAT Null" -$CLICKHOUSE_CLIENT -q "SELECT * FROM hits_s3_sampled WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10 FORMAT Null" +$CLICKHOUSE_CLIENT -q "SELECT * FROM hits_s3_sampled WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10 FORMAT Null SETTINGS filesystem_cache_reserve_space_wait_lock_timeout_milliseconds=2000" +$CLICKHOUSE_CLIENT -q "SELECT * FROM hits_s3_sampled WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10 FORMAT Null SETTINGS filesystem_cache_reserve_space_wait_lock_timeout_milliseconds=2000" query_id=02906_read_from_cache_$RANDOM -$CLICKHOUSE_CLIENT --query_id ${query_id} -q "SELECT * FROM hits_s3_sampled WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10 FORMAT Null" +$CLICKHOUSE_CLIENT --query_id ${query_id} -q "SELECT * FROM hits_s3_sampled WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10 FORMAT Null SETTINGS filesystem_cache_reserve_space_wait_lock_timeout_milliseconds=2000" $CLICKHOUSE_CLIENT -nq " SYSTEM FLUSH LOGS; diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index ef2d89f0218..f7017958635 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -132,7 +132,7 @@ export CLICKHOUSE_URL_INTERSERVER=${CLICKHOUSE_URL_INTERSERVER:="${CLICKHOUSE_PO export CLICKHOUSE_CURL_COMMAND=${CLICKHOUSE_CURL_COMMAND:="curl"} # The queries in CI are prone to sudden delays, and we often don't check for curl # errors, so it makes sense to set a relatively generous timeout. -export CLICKHOUSE_CURL_TIMEOUT=${CLICKHOUSE_CURL_TIMEOUT:="60"} +export CLICKHOUSE_CURL_TIMEOUT=${CLICKHOUSE_CURL_TIMEOUT:="120"} export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="${CLICKHOUSE_CURL_COMMAND} -q -s --max-time ${CLICKHOUSE_CURL_TIMEOUT}"} export CLICKHOUSE_TMP=${CLICKHOUSE_TMP:="."} mkdir -p ${CLICKHOUSE_TMP} diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 862f38976ce..b21ae0764c6 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1622,6 +1622,7 @@ filesystem filesystemAvailable filesystemCapacity filesystemFree +filesystemUnreserved filesystems finalizeAggregation fips @@ -1865,9 +1866,11 @@ jdbc jemalloc jeprof joinGet +joinGetOrNull json jsonMergePatch jsonasstring +jsonasobject jsoncolumns jsoncolumnsmonoblock jsoncompact @@ -2541,6 +2544,7 @@ sqlite sqrt src srcReplicas +sshkey stackoverflow stacktrace stacktraces diff --git a/utils/corrector_utf8/CMakeLists.txt b/utils/corrector_utf8/CMakeLists.txt index 4744dd5e0a5..17f047a8cff 100644 --- a/utils/corrector_utf8/CMakeLists.txt +++ b/utils/corrector_utf8/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable(corrector_utf8 corrector_utf8.cpp) -target_link_libraries(corrector_utf8 PRIVATE clickhouse_common_io) +target_link_libraries(corrector_utf8 PRIVATE clickhouse_common_io clickhouse_common_config)