diff --git a/.github/actions/release/action.yml b/.github/actions/release/action.yml index 99ec02662f6..c3897682a33 100644 --- a/.github/actions/release/action.yml +++ b/.github/actions/release/action.yml @@ -62,8 +62,8 @@ runs: if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/create_release.py --set-progress-started --progress "update ChangeLog" - [ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1 + git checkout master + python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security" echo "List versions" ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv echo "Update docker version" @@ -96,17 +96,13 @@ runs: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} ### Changelog category (leave one): - Not for changelog (changelog entry is not required) - - name: Reset changes if Dry-run - if: ${{ inputs.dry-run }} + - name: Complete previous steps and Restore git state + if: ${{ inputs.type == 'patch' }} shell: bash run: | - git reset --hard HEAD - - name: Checkout back to GITHUB_REF - shell: bash - run: | - git checkout "$GITHUB_REF_NAME" - # set current progress to OK python3 ./tests/ci/create_release.py --set-progress-completed + git reset --hard HEAD + git checkout "$GITHUB_REF_NAME" - name: Create GH Release shell: bash if: ${{ inputs.type == 'patch' }} @@ -146,24 +142,23 @@ runs: if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/create_release.py --set-progress-started --progress "docker server release" cd "./tests/ci" + python3 ./create_release.py --set-progress-started --progress "docker server release" export CHECK_NAME="Docker server image" python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - python3 ./tests/ci/create_release.py --set-progress-completed + python3 ./create_release.py --set-progress-completed - name: Docker clickhouse/clickhouse-keeper building if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/create_release.py --set-progress-started --progress "docker keeper release" cd "./tests/ci" + python3 ./create_release.py --set-progress-started --progress "docker keeper release" export CHECK_NAME="Docker keeper image" python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - python3 ./tests/ci/create_release.py --set-progress-completed - - name: Set Release progress completed + python3 ./create_release.py --set-progress-completed + - name: Set current Release progress to Completed with OK shell: bash run: | - # If we here - set completed status, to post proper Slack OK or FAIL message in the next step python3 ./tests/ci/create_release.py --set-progress-started --progress "completed" python3 ./tests/ci/create_release.py --set-progress-completed - name: Post Slack Message diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 9645d0e46de..50f4f503f5d 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -241,8 +241,9 @@ jobs: runner_type: stress-tester data: ${{ needs.RunConfig.outputs.data }} FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: + - RunConfig - Builds_Report - FunctionalStatelessTestAsan - FunctionalStatefulTestDebug @@ -257,6 +258,7 @@ jobs: with: clear-repository: true - name: Finish label + if: ${{ !failure() }} run: | cd "$GITHUB_WORKSPACE/tests/ci" # update mergeable check @@ -264,3 +266,13 @@ jobs: # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} python3 merge_pr.py + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 09acef5eb8b..b28d87ee31f 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -121,34 +121,6 @@ jobs: runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} - MarkReleaseReady: - if: ${{ !failure() && !cancelled() }} - needs: [RunConfig, Builds_1, Builds_2] - runs-on: [self-hosted, style-checker-aarch64] - steps: - - name: Debug - run: | - echo need with different filters - cat << 'EOF' - ${{ toJSON(needs) }} - ${{ toJSON(needs.*.result) }} - no failures ${{ !contains(needs.*.result, 'failure') }} - no skips ${{ !contains(needs.*.result, 'skipped') }} - no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - EOF - - name: Not ready - # fail the job to be able to restart it - if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }} - run: exit 1 - - name: Check out repository code - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - uses: ClickHouse/checkout@v1 - - name: Mark Commit Release Ready - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 mark_release_ready.py - FinishCheck: if: ${{ !cancelled() }} needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] @@ -160,3 +132,13 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index 31a65ac3d15..db89825a99a 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -93,7 +93,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} + if: ${{ !cancelled() }} # Test_2 or Test_3 must not have jobs required for Mergeable check needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1] runs-on: [self-hosted, style-checker-aarch64] @@ -101,6 +101,17 @@ jobs: - name: Check out repository code uses: ClickHouse/checkout@v1 - name: Check and set merge status + if: ${{ needs.StyleCheck.result == 'success' }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index bffe5b4c1bf..fd5b5eefcc4 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -44,3 +44,20 @@ jobs: with: data: "${{ needs.RunConfig.outputs.data }}" set_latest: true + CheckWorkflow: + if: ${{ !cancelled() }} + needs: [RunConfig, BuildDockers] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 5124e4dba2c..9930cf6dde4 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -151,7 +151,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} + if: ${{ !cancelled() }} # Test_2 or Test_3 must not have jobs required for Mergeable check needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1] runs-on: [self-hosted, style-checker-aarch64] @@ -161,9 +161,20 @@ jobs: with: filter: tree:0 - name: Check and set merge status + if: ${{ needs.StyleCheck.result == 'success' }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status ################################# Stage Final ################################# # diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 6a18999d74e..50565112825 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -441,8 +441,9 @@ jobs: runner_type: stress-tester data: ${{ needs.RunConfig.outputs.data }} FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: + - RunConfig - DockerServerImage - DockerKeeperImage - Builds_Report @@ -478,9 +479,20 @@ jobs: with: clear-repository: true - name: Finish label + if: ${{ !failure() }} run: | cd "$GITHUB_WORKSPACE/tests/ci" # update mergeable check python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.yamllint b/.yamllint index f144e2d47b1..7fb741ec9f4 100644 --- a/.yamllint +++ b/.yamllint @@ -14,3 +14,9 @@ rules: comments: min-spaces-from-content: 1 document-start: disable + colons: disable + indentation: disable + line-length: disable + trailing-spaces: disable + truthy: disable + new-line-at-end-of-file: disable diff --git a/base/base/defines.h b/base/base/defines.h index 2fc54c37bde..5685a6d9833 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -87,10 +87,13 @@ # define ASAN_POISON_MEMORY_REGION(a, b) #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) - #if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) || defined(UNDEFINED_BEHAVIOR_SANITIZER) - #define ABORT_ON_LOGICAL_ERROR - #endif +/// We used to have only ABORT_ON_LOGICAL_ERROR macro, but most of its uses were actually in places where we didn't care about logical errors +/// but wanted to check exactly if the current build type is debug or with sanitizer. This new macro is introduced to fix those places. +#if !defined(DEBUG_OR_SANITIZER_BUILD) +# if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) \ + || defined(UNDEFINED_BEHAVIOR_SANITIZER) +# define DEBUG_OR_SANITIZER_BUILD +# endif #endif /// chassert(x) is similar to assert(x), but: @@ -101,7 +104,7 @@ /// Also it makes sense to call abort() instead of __builtin_unreachable() in debug builds, /// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy) #if !defined(chassert) - #if defined(ABORT_ON_LOGICAL_ERROR) +# if defined(DEBUG_OR_SANITIZER_BUILD) // clang-format off #include namespace DB diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md index 96477f777a9..ef1e58fd18e 100644 --- a/docs/en/operations/settings/settings-users.md +++ b/docs/en/operations/settings/settings-users.md @@ -22,6 +22,21 @@ Structure of the `users` section: + + + ssh-ed25519 + AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj + + + ecdsa-sha2-nistp256 + AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBNxeV2uN5UY6CUbCzTA1rXfYimKQA5ivNIqxdax4bcMXz4D0nSk2l5E1TkR5mG8EBWtmExSPbcEPJ8V7lyWWbA8= + + + ssh-rsa + AAAAB3NzaC1yc2EAAAADAQABAAABgQCpgqL1SHhPVBOTFlOm0pu+cYBbADzC2jL41sPMawYCJHDyHuq7t+htaVVh2fRgpAPmSEnLEC2d4BEIKMtPK3bfR8plJqVXlLt6Q8t4b1oUlnjb3VPA9P6iGcW7CV1FBkZQEVx8ckOfJ3F+kI5VsrRlEDgiecm/C1VPl0/9M2llW/mPUMaD65cM9nlZgM/hUeBrfxOEqM11gDYxEZm1aRSbZoY4dfdm3vzvpSQ6lrCrkjn3X2aSmaCLcOWJhfBWMovNDB8uiPuw54g3ioZ++qEQMlfxVsqXDGYhXCrsArOVuW/5RbReO79BvXqdssiYShfwo+GhQ0+aLWMIW/jgBkkqx/n7uKLzCMX7b2F+aebRYFh+/QXEj7SnihdVfr9ud6NN3MWzZ1ltfIczlEcFLrLJ1Yq57wW6wXtviWh59WvTWFiPejGjeSjjJyqqB49tKdFVFuBnIU5u/bch2DXVgiAEdQwUrIp1ACoYPq22HFFAYUJrL32y7RxX3PGzuAv3LOc= + + + 0|1 @@ -79,6 +94,24 @@ Password can be specified in plaintext or in SHA256 (hex format). The first line of the result is the password. The second line is the corresponding double SHA1 hash. +### username/ssh-key {#user-sshkey} + +This setting allows authenticating with SSH keys. + +Given a SSH key (as generated by `ssh-keygen`) like +``` +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj john@example.com +``` +The `ssh_key` element is expected to be +``` + + ssh-ed25519 + AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj + +``` + +Substitute `ssh-ed25519` with `ssh-rsa` or `ecdsa-sha2-nistp256` for the other supported algorithms. + ### access_management {#access_management-user-setting} This setting enables or disables using of SQL-driven [access control and account management](../../guides/sre/user-management/index.md#access-control) for the user. diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 34c6016235a..96d9d26977d 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -297,7 +297,7 @@ Algorithm requires the special column in tables. This column: - Must contain an ordered sequence. - Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md). -- Can’t be the only column in the `JOIN` clause. +- For `hash` join algorithm it can’t be the only column in the `JOIN` clause. Syntax `ASOF JOIN ... ON`: @@ -337,7 +337,8 @@ For example, consider the following tables: `ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` can’t be joined. :::note -`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. +`ASOF JOIN` is supported only by `hash` and `full_sorting_merge` join algorithms. +It's **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. ::: ## PASTE JOIN Usage diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 4a11eae15ea..5c68bca3a6e 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -68,6 +68,41 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes) return nullptr; } +/// Checks if the node is combination of isNull and notEquals functions of two the same arguments +bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs) +{ + QueryTreeNodePtrWithHashSet all_arguments; + for (const auto & node : nodes) + { + const auto * func_node = node->as(); + if (!func_node) + return false; + + const auto & arguments = func_node->getArguments().getNodes(); + if (func_node->getFunctionName() == "isNull" && arguments.size() == 1) + all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0])); + else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2) + { + if (arguments[0]->isEqual(*arguments[1])) + return false; + all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0])); + all_arguments.insert(QueryTreeNodePtrWithHash(arguments[1])); + } + else + return false; + + if (all_arguments.size() > 2) + return false; + } + + if (all_arguments.size() != 2) + return false; + + lhs = all_arguments.begin()->node; + rhs = std::next(all_arguments.begin())->node; + return true; +} + bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value) { const auto * constant_node = node->as(); @@ -213,11 +248,14 @@ private: else if (func_name == "and") { const auto & and_arguments = argument_function->getArguments().getNodes(); - bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull"); - if (all_are_is_null) + + QueryTreeNodePtr is_null_lhs_arg; + QueryTreeNodePtr is_null_rhs_arg; + if (matchIsNullOfTwoArgs(and_arguments, is_null_lhs_arg, is_null_rhs_arg)) { - is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1); - is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1); + is_null_argument_to_indices[is_null_lhs_arg].push_back(or_operands.size() - 1); + is_null_argument_to_indices[is_null_rhs_arg].push_back(or_operands.size() - 1); + continue; } /// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b` diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index f7919b6422c..4443f83596f 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -62,7 +62,7 @@ namespace ErrorCodes namespace { -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) /** This visitor checks if Query Tree structure is valid after each pass * in debug build. @@ -183,7 +183,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node) for (size_t i = 0; i < passes_size; ++i) { passes[i]->run(query_tree_node, current_context); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) ValidationChecker(passes[i]->getName()).visit(query_tree_node); #endif } @@ -208,7 +208,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pa for (size_t i = 0; i < up_to_pass_index; ++i) { passes[i]->run(query_tree_node, current_context); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) ValidationChecker(passes[i]->getName()).visit(query_tree_node); #endif } diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index fa8ed5855dd..cd3f963b15d 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -105,7 +105,7 @@ bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAS auto new_query = query.clone(); adjustCreateQueryForBackup(new_query, global_context); ASTCreateQuery & create = typeid_cast(*new_query); - create.setUUID({}); + create.resetUUIDs(); create.if_not_exists = false; return new_query; }; diff --git a/src/Backups/RestoreCoordinationLocal.cpp b/src/Backups/RestoreCoordinationLocal.cpp index f51d6c0c1d8..9fe22f874b4 100644 --- a/src/Backups/RestoreCoordinationLocal.cpp +++ b/src/Backups/RestoreCoordinationLocal.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -67,7 +68,7 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer auto it = create_query_uuids.find(query_str); if (it != create_query_uuids.end()) { - create_query.setUUID(it->second); + it->second.copyToQuery(create_query); return true; } return false; @@ -79,7 +80,8 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer return; } - auto new_uuids = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true); + CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true}; + new_uuids.copyToQuery(create_query); { std::lock_guard lock{mutex}; diff --git a/src/Backups/RestoreCoordinationLocal.h b/src/Backups/RestoreCoordinationLocal.h index 5e51b719d63..35f93574b68 100644 --- a/src/Backups/RestoreCoordinationLocal.h +++ b/src/Backups/RestoreCoordinationLocal.h @@ -1,16 +1,17 @@ #pragma once #include -#include +#include +#include #include #include #include -namespace Poco { class Logger; } - namespace DB { +class ASTCreateQuery; + /// Implementation of the IRestoreCoordination interface performing coordination in memory. class RestoreCoordinationLocal : public IRestoreCoordination @@ -55,7 +56,7 @@ private: std::set> acquired_tables_in_replicated_databases; std::unordered_set acquired_data_in_replicated_tables; - std::unordered_map create_query_uuids; + std::unordered_map create_query_uuids; std::unordered_set acquired_data_in_keeper_map_tables; mutable std::mutex mutex; diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp index 84106737fc9..44214d00be5 100644 --- a/src/Backups/RestoreCoordinationRemote.cpp +++ b/src/Backups/RestoreCoordinationRemote.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -269,7 +270,8 @@ bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String & void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_query) { String query_str = serializeAST(create_query); - String new_uuids_str = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true).toString(); + CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true}; + String new_uuids_str = new_uuids.toString(); auto holder = with_retries.createRetriesControlHolder("generateUUIDForTable"); holder.retries_ctl.retryLoop( @@ -281,11 +283,14 @@ void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_que Coordination::Error res = zk->tryCreate(path, new_uuids_str, zkutil::CreateMode::Persistent); if (res == Coordination::Error::ZOK) + { + new_uuids.copyToQuery(create_query); return; + } if (res == Coordination::Error::ZNODEEXISTS) { - create_query.setUUID(ASTCreateQuery::UUIDs::fromString(zk->get(path))); + CreateQueryUUIDs::fromString(zk->get(path)).copyToQuery(create_query); return; } diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 33bd1266c90..e26fe790a8e 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -267,7 +267,7 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length) #else void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length) @@ -465,7 +465,7 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size insertMergeFrom(from, n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n) #else void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n) diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 330a707b75c..b581c3ba3b4 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -145,7 +145,7 @@ public: void insertData(const char * pos, size_t length) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & from, size_t n) override; #else using IColumn::insertFrom; @@ -189,7 +189,7 @@ public: void protect() override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & from, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override; @@ -212,7 +212,7 @@ public: MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override #else int doCompareAt(size_t, size_t, const IColumn &, int) const override diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 9203fb8042f..19cce678cc7 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -336,7 +336,7 @@ bool ColumnArray::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnArray::insertFrom(const IColumn & src_, size_t n) #else void ColumnArray::doInsertFrom(const IColumn & src_, size_t n) @@ -395,7 +395,7 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan : 1); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const #else int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const @@ -542,7 +542,7 @@ void ColumnArray::getExtremes(Field & min, Field & max) const } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 5e01b9144d7..63affb86d9d 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -84,14 +84,14 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; #endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; #else void doInsertFrom(const IColumn & src_, size_t n) override; @@ -103,7 +103,7 @@ public: ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index 19470113394..c4270e8216b 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -86,7 +86,7 @@ public: bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); } void insert(const Field &) override { throwMustBeDecompressed(); } bool tryInsert(const Field &) override { throwMustBeDecompressed(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } #else void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } @@ -105,7 +105,7 @@ public: void expand(const Filter &, bool) override { throwMustBeDecompressed(); } ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); } ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } #else int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 65ce53687b9..ca38e76ea57 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -123,7 +123,7 @@ public: return data->isNullAt(0); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override #else void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override @@ -151,7 +151,7 @@ public: ++s; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn &, size_t) override #else void doInsertFrom(const IColumn &, size_t) override @@ -160,7 +160,7 @@ public: ++s; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } #else void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } @@ -237,7 +237,7 @@ public: return data->allocatedBytes() + sizeof(s); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override #else int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index e27807950ae..bb4433f8956 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -31,7 +31,7 @@ namespace ErrorCodes } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const #else int ColumnDecimal::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const @@ -333,7 +333,7 @@ void ColumnDecimal::insertData(const char * src, size_t /*length*/) } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDecimal::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnDecimal::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index eb8a305a822..59bfbd2159c 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -55,13 +55,13 @@ public: void reserve(size_t n) override { data.reserve_exact(n); } void shrinkToFit() override { data.shrink_to_fit(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } #else void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override @@ -76,7 +76,7 @@ public: void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } void insert(const Field & x) override { data.push_back(x.get()); } bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -104,7 +104,7 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index c735238f515..a92d54dd675 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -215,7 +215,7 @@ bool ColumnDynamic::tryInsert(const DB::Field & x) } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) #else void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) @@ -269,7 +269,7 @@ void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) #else void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) @@ -439,7 +439,7 @@ void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, si } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) #else void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) @@ -603,7 +603,7 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const #else int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 6f09abb945a..e92cabd3db9 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -144,7 +144,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; @@ -221,7 +221,7 @@ public: return scattered_columns; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 4d17eb0bebd..0bb3f7edb14 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -74,7 +74,7 @@ bool ColumnFixedString::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) #else void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) @@ -90,7 +90,7 @@ void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -225,7 +225,7 @@ size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation & return elements.size(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 630c6c1c0a6..676ac7712ba 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -98,13 +98,13 @@ public: bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t index) override; #else void doInsertFrom(const IColumn & src_, size_t index) override; #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; @@ -137,7 +137,7 @@ public: void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override #else int doCompareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override @@ -156,7 +156,7 @@ public: size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index fa57f35a823..fc81efaac0c 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -72,7 +72,7 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFunction::insertFrom(const IColumn & src, size_t n) #else void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) @@ -93,7 +93,7 @@ void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) ++elements_size; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index dcd67aecad7..b62c6bf70eb 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -95,12 +95,12 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; #else void doInsertFrom(const IColumn & src, size_t n) override; #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override; @@ -146,7 +146,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override #else int doCompareAt(size_t, size_t, const IColumn &, int) const override diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 49ce948bf78..a977046b07f 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -158,7 +158,7 @@ void ColumnLowCardinality::insertDefault() idx.insertPosition(getDictionary().getDefaultValueIndex()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n) #else void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n) @@ -190,7 +190,7 @@ void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n) idx.insertPosition(getDictionary().uniqueInsertFrom(src, n)); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -362,7 +362,7 @@ int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index fb0c1237fcf..3766b247d60 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -78,14 +78,14 @@ public: bool tryInsert(const Field & x) override; void insertDefault() override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; #else void doInsertFrom(const IColumn & src, size_t n) override; #endif void insertFromFullColumn(const IColumn & src, size_t n); -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -135,7 +135,7 @@ public: return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit)); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 08d7734ac6b..1025b4e77b9 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -153,7 +153,7 @@ void ColumnMap::updateHashFast(SipHash & hash) const nested->updateHashFast(hash); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnMap::insertFrom(const IColumn & src, size_t n) #else void ColumnMap::doInsertFrom(const IColumn & src, size_t n) @@ -162,7 +162,7 @@ void ColumnMap::doInsertFrom(const IColumn & src, size_t n) nested->insertFrom(assert_cast(src).getNestedColumn(), n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -171,7 +171,7 @@ void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t le assert_cast(*nested).insertManyFrom(assert_cast(src).getNestedColumn(), position, length); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -222,7 +222,7 @@ MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & sele return res; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 29275e1b5f7..3eaaa0ad562 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -67,7 +67,7 @@ public: WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -83,7 +83,7 @@ public: ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 867c9149242..6529f0b78db 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -217,7 +217,7 @@ const char * ColumnNullable::skipSerializedInArena(const char * pos) const return pos; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -258,7 +258,7 @@ bool ColumnNullable::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnNullable::insertFrom(const IColumn & src, size_t n) #else void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) @@ -270,7 +270,7 @@ void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -410,7 +410,7 @@ int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const #else int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 15bbd8c3b57..fe9f5b6dcc2 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -69,7 +69,7 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -77,7 +77,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else @@ -100,7 +100,7 @@ public: void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 9c9dade3dd8..a6431007cb6 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -763,7 +763,7 @@ void ColumnObject::get(size_t n, Field & res) const } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnObject::insertFrom(const IColumn & src, size_t n) #else void ColumnObject::doInsertFrom(const IColumn & src, size_t n) @@ -772,7 +772,7 @@ void ColumnObject::doInsertFrom(const IColumn & src, size_t n) insert(src[n]); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index 5303adf0881..25cfaa550f6 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -211,7 +211,7 @@ public: bool tryInsert(const Field & field) override; void insertDefault() override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else @@ -236,7 +236,7 @@ public: /// Order of rows in ColumnObject is undefined. void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override; void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {} -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } #else int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 8f98a4433d3..a908d970a15 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -174,7 +174,7 @@ const char * ColumnSparse::skipSerializedInArena(const char * pos) const return values->skipSerializedInArena(pos); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -252,7 +252,7 @@ bool ColumnSparse::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnSparse::insertFrom(const IColumn & src, size_t n) #else void ColumnSparse::doInsertFrom(const IColumn & src, size_t n) @@ -454,7 +454,7 @@ ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray & indexes, size_t l return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const #else int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 392a6910956..7a4d914e62a 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -81,14 +81,14 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char *) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; #endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; #else void doInsertFrom(const IColumn & src, size_t n) override; @@ -106,7 +106,7 @@ public: template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 4accfbe8f41..7cfa2571f5a 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -39,7 +39,7 @@ ColumnString::ColumnString(const ColumnString & src) last_offset, chars.size()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -132,7 +132,7 @@ WeakHash32 ColumnString::getWeakHash32() const } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index faaaa8848ca..c1012e1e55e 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -142,7 +142,7 @@ public: return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override #else void doInsertFrom(const IColumn & src_, size_t n) override @@ -169,7 +169,7 @@ public: } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; @@ -220,7 +220,7 @@ public: hash.update(reinterpret_cast(chars.data()), chars.size() * sizeof(chars[0])); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -250,7 +250,7 @@ public: offsets.push_back(offsets.back() + 1); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 6a0d85cba69..4fc3f88a87c 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -206,7 +206,7 @@ bool ColumnTuple::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnTuple::insertFrom(const IColumn & src_, size_t n) #else void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) @@ -223,7 +223,7 @@ void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) columns[i]->insertFrom(*src.columns[i], n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -327,7 +327,7 @@ void ColumnTuple::updateHashFast(SipHash & hash) const column->updateHashFast(hash); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -483,7 +483,7 @@ int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_ return 0; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 2fafd93f776..16b47a993f6 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -66,7 +66,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else @@ -83,7 +83,7 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -94,7 +94,7 @@ public: ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index ec1f8e0a4d5..d6cb75679be 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -90,7 +90,7 @@ public: return getNestedColumn()->updateHashWithValue(n, hash_func); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; @@ -492,7 +492,7 @@ const char * ColumnUnique::skipSerializedInArena(const char *) const } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnUnique::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index 8fd6e1bbac1..de7efb41d19 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -595,7 +595,7 @@ void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVariant::insertFrom(const IColumn & src_, size_t n) #else void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) @@ -604,7 +604,7 @@ void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) insertFromImpl(src_, n, nullptr); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) #else void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) @@ -613,7 +613,7 @@ void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t insertRangeFromImpl(src_, start, length, nullptr); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) #else void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) @@ -1175,7 +1175,7 @@ bool ColumnVariant::hasEqualValues() const return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index 94f3066e676..34c24b5428d 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -180,7 +180,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; void insertManyFrom(const IColumn & src_, size_t position, size_t length) override; @@ -223,7 +223,7 @@ public: ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; ColumnPtr replicate(const Offsets & replicate_offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 185a1e0f615..c474efe35bd 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -502,7 +502,7 @@ bool ColumnVector::tryInsert(const DB::Field & x) } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnVector::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index c01778ecf32..2fe5b635bd2 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -64,7 +64,7 @@ public: return data.size(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override #else void doInsertFrom(const IColumn & src, size_t n) override @@ -73,7 +73,7 @@ public: data.push_back(assert_cast(src).getData()[n]); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override @@ -150,7 +150,7 @@ public: } /// This method implemented in header because it could be possibly devirtualized. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override @@ -240,7 +240,7 @@ public: bool tryInsert(const DB::Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 552e52cf51c..a189903b617 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -46,7 +46,7 @@ String IColumn::dumpStructure() const return res.str(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void IColumn::insertFrom(const IColumn & src, size_t n) #else void IColumn::doInsertFrom(const IColumn & src, size_t n) diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 3798d3b7466..f9c1a3e7034 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -179,7 +179,7 @@ public: /// Appends n-th element from other column with the same type. /// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) virtual void insertFrom(const IColumn & src, size_t n); #else void insertFrom(const IColumn & src, size_t n) @@ -191,7 +191,7 @@ public: /// Appends range of elements from other column with the same type. /// Could be used to concatenate columns. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; #else void insertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -202,7 +202,7 @@ public: #endif /// Appends one element from other column with the same type multiple times. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) virtual void insertManyFrom(const IColumn & src, size_t position, size_t length) { for (size_t i = 0; i < length; ++i) @@ -345,7 +345,7 @@ public: * * For non Nullable and non floating point types, nan_direction_hint is ignored. */ -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) [[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; #else [[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const @@ -667,7 +667,7 @@ protected: Sort full_sort, PartialSort partial_sort) const; -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) virtual void doInsertFrom(const IColumn & src, size_t n); virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index b18f4fdb302..40d410e207d 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -27,7 +27,7 @@ public: size_t byteSize() const override { return 0; } size_t byteSizeAt(size_t) const override { return 0; } size_t allocatedBytes() const override { return 0; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } #else int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } @@ -73,7 +73,7 @@ public: { } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn &, size_t) override #else void doInsertFrom(const IColumn &, size_t) override @@ -82,7 +82,7 @@ public: ++s; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override #else void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index 1b86204f5b1..a8e10e5e2b2 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -86,7 +86,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique."); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t, size_t) override #else void doInsertRangeFrom(const IColumn &, size_t, size_t) override diff --git a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp index 645f6ed79f3..240099f0ae5 100644 --- a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp +++ b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp @@ -52,7 +52,7 @@ static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows) } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src) #else static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src) diff --git a/src/Common/Config/AbstractConfigurationComparison.cpp b/src/Common/Config/AbstractConfigurationComparison.cpp index 607b583cf31..80c837ed43b 100644 --- a/src/Common/Config/AbstractConfigurationComparison.cpp +++ b/src/Common/Config/AbstractConfigurationComparison.cpp @@ -38,7 +38,7 @@ namespace std::erase_if(left_subkeys, [&](const String & key) { return ignore_keys->contains(key); }); std::erase_if(right_subkeys, [&](const String & key) { return ignore_keys->contains(key); }); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) /// Compound `ignore_keys` are not yet implemented. for (const auto & ignore_key : *ignore_keys) chassert(ignore_key.find('.') == std::string_view::npos); diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 7c97e73f278..1011ab12d15 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -234,10 +234,10 @@ M(PartsCommitted, "Deprecated. See PartsActive.") \ M(PartsPreActive, "The part is in data_parts, but not used for SELECTs.") \ M(PartsActive, "Active data part, used by current and upcoming SELECTs.") \ - M(AttachedDatabase, "Active database, used by current and upcoming SELECTs.") \ - M(AttachedTable, "Active table, used by current and upcoming SELECTs.") \ - M(AttachedView, "Active view, used by current and upcoming SELECTs.") \ - M(AttachedDictionary, "Active dictionary, used by current and upcoming SELECTs.") \ + M(AttachedDatabase, "Active databases.") \ + M(AttachedTable, "Active tables.") \ + M(AttachedView, "Active views.") \ + M(AttachedDictionary, "Active dictionaries.") \ M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \ M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \ M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \ diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 111280074dd..33befa64946 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -64,7 +64,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc { // In debug builds and builds with sanitizers, treat LOGICAL_ERROR as an assertion failure. // Log the message before we fail. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD if (code == ErrorCodes::LOGICAL_ERROR) { abortOnFailedAssertion(msg, trace.data(), 0, trace.size()); @@ -443,7 +443,7 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b } catch (...) {} // NOLINT(bugprone-empty-catch) -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD try { throw; diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 0ffae89ffa6..f90cc087a17 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -184,7 +184,7 @@ void MemoryTracker::debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused] { /// Big allocations through allocNoThrow (without checking memory limits) may easily lead to OOM (and it's hard to debug). /// Let's find them. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD if (size < 0) return; diff --git a/src/Common/PageCache.cpp b/src/Common/PageCache.cpp index 56bd8c1a339..d719a387e14 100644 --- a/src/Common/PageCache.cpp +++ b/src/Common/PageCache.cpp @@ -424,7 +424,7 @@ static void logUnexpectedSyscallError(std::string name) { std::string message = fmt::format("{} failed: {}", name, errnoToString()); LOG_WARNING(&Poco::Logger::get("PageCache"), "{}", message); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) volatile bool true_ = true; if (true_) // suppress warning about missing [[noreturn]] abortOnFailedAssertion(message); diff --git a/src/Common/assert_cast.h b/src/Common/assert_cast.h index f9d0bf0e595..7a04372ffad 100644 --- a/src/Common/assert_cast.h +++ b/src/Common/assert_cast.h @@ -25,7 +25,7 @@ namespace DB template inline To assert_cast(From && from) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD try { if constexpr (std::is_pointer_v) diff --git a/src/Common/getRandomASCIIString.cpp b/src/Common/getRandomASCIIString.cpp index 594b4cd3228..a295277b453 100644 --- a/src/Common/getRandomASCIIString.cpp +++ b/src/Common/getRandomASCIIString.cpp @@ -6,12 +6,17 @@ namespace DB { String getRandomASCIIString(size_t length) +{ + return getRandomASCIIString(length, thread_local_rng); +} + +String getRandomASCIIString(size_t length, pcg64 & rng) { std::uniform_int_distribution distribution('a', 'z'); String res; res.resize(length); for (auto & c : res) - c = distribution(thread_local_rng); + c = distribution(rng); return res; } diff --git a/src/Common/getRandomASCIIString.h b/src/Common/getRandomASCIIString.h index 627d2700ce3..19e1ff7120e 100644 --- a/src/Common/getRandomASCIIString.h +++ b/src/Common/getRandomASCIIString.h @@ -2,11 +2,14 @@ #include +#include + namespace DB { /// Slow random string. Useful for random names and things like this. Not for generating data. String getRandomASCIIString(size_t length); +String getRandomASCIIString(size_t length, pcg64 & rng); } diff --git a/src/Common/tests/gtest_rw_lock.cpp b/src/Common/tests/gtest_rw_lock.cpp index d8c6e9cb99d..9b0c9aeafbe 100644 --- a/src/Common/tests/gtest_rw_lock.cpp +++ b/src/Common/tests/gtest_rw_lock.cpp @@ -166,7 +166,7 @@ TEST(Common, RWLockRecursive) auto lock2 = fifo_lock->getLock(RWLockImpl::Read, "q2"); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD /// It throws LOGICAL_ERROR EXPECT_ANY_THROW({fifo_lock->getLock(RWLockImpl::Write, "q2");}); #endif diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index d81dc7a76d8..d149b49d465 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -80,13 +80,20 @@ namespace /// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query. void visitCreateQuery(const ASTCreateQuery & create) { - QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name}; - if (!to_table.table.empty()) + if (create.targets) { - /// TO target_table (for materialized views) - if (to_table.database.empty()) - to_table.database = current_database; - dependencies.emplace(to_table); + for (const auto & target : create.targets->targets) + { + const auto & table_id = target.table_id; + if (!table_id.table_name.empty()) + { + /// TO target_table (for materialized views) + QualifiedTableName target_name{table_id.database_name, table_id.table_name}; + if (target_name.database.empty()) + target_name.database = current_database; + dependencies.emplace(target_name); + } + } } QualifiedTableName as_table{create.as_database, create.as_table}; diff --git a/src/Databases/DDLRenamingVisitor.cpp b/src/Databases/DDLRenamingVisitor.cpp index 6cd414635a0..38e100e2470 100644 --- a/src/Databases/DDLRenamingVisitor.cpp +++ b/src/Databases/DDLRenamingVisitor.cpp @@ -86,12 +86,19 @@ namespace create.as_table = as_table_new.table; } - QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name}; - if (!to_table.table.empty() && !to_table.database.empty()) + if (create.targets) { - auto to_table_new = data.renaming_map.getNewTableName(to_table); - if (to_table_new != to_table) - create.to_table_id = StorageID{to_table_new.database, to_table_new.table}; + for (auto & target : create.targets->targets) + { + auto & table_id = target.table_id; + if (!table_id.database_name.empty() && !table_id.table_name.empty()) + { + QualifiedTableName target_name{table_id.database_name, table_id.table_name}; + auto new_target_name = data.renaming_map.getNewTableName(target_name); + if (new_target_name != target_name) + table_id = StorageID{new_target_name.database, new_target_name.table}; + } + } } } diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 5017c9b25cb..ca30ee6db15 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -195,7 +195,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n snapshot_detached_tables.erase(table_name); } - CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::add(CurrentMetrics::AttachedTable); } StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name) @@ -221,7 +221,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta .metadata_path = getObjectMetadataPath(table_name), .is_permanently = false}); - CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::sub(CurrentMetrics::AttachedTable); } return res; } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 7ce2859e962..4c079ae5300 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -729,81 +729,14 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ if (auto * create = query->as()) { - bool replicated_table = create->storage && create->storage->engine && - (startsWith(create->storage->engine->name, "Replicated") || startsWith(create->storage->engine->name, "Shared")); - if (!replicated_table || !create->storage->engine->arguments) - return; + if (create->storage) + checkTableEngine(*create, *create->storage, query_context); - ASTs & args_ref = create->storage->engine->arguments->children; - ASTs args = args_ref; - if (args.size() < 2) - return; - - /// It can be a constant expression. Try to evaluate it, ignore exception if we cannot. - bool has_expression_argument = args_ref[0]->as() || args_ref[1]->as(); - if (has_expression_argument) + if (create->targets) { - try - { - args[0] = evaluateConstantExpressionAsLiteral(args_ref[0]->clone(), query_context); - args[1] = evaluateConstantExpressionAsLiteral(args_ref[1]->clone(), query_context); - } - catch (...) // NOLINT(bugprone-empty-catch) - { - } + for (const auto & inner_table_engine : create->targets->getInnerEngines()) + checkTableEngine(*create, *inner_table_engine, query_context); } - - ASTLiteral * arg1 = args[0]->as(); - ASTLiteral * arg2 = args[1]->as(); - if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String) - return; - - String maybe_path = arg1->value.get(); - String maybe_replica = arg2->value.get(); - - /// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments. - /// Let's ensure that some macros are used. - /// NOTE: we cannot check here that substituted values will be actually different on shards and replicas. - - Macros::MacroExpansionInfo info; - info.table_id = {getDatabaseName(), create->getTable(), create->uuid}; - info.shard = getShardName(); - info.replica = getReplicaName(); - query_context->getMacros()->expand(maybe_path, info); - bool maybe_shard_macros = info.expanded_other; - info.expanded_other = false; - query_context->getMacros()->expand(maybe_replica, info); - bool maybe_replica_macros = info.expanded_other; - bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros"); - - if (!enable_functional_tests_helper) - { - if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments) - LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments"); - else - throw Exception(ErrorCodes::INCORRECT_QUERY, - "It's not allowed to specify explicit zookeeper_path and replica_name " - "for ReplicatedMergeTree arguments in Replicated database. If you really want to " - "specify them explicitly, enable setting " - "database_replicated_allow_replicated_engine_arguments."); - } - - if (maybe_shard_macros && maybe_replica_macros) - return; - - if (enable_functional_tests_helper && !has_expression_argument) - { - if (maybe_path.empty() || maybe_path.back() != '/') - maybe_path += '/'; - args_ref[0]->as()->value = maybe_path + "auto_{shard}"; - args_ref[1]->as()->value = maybe_replica + "auto_{replica}"; - return; - } - - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. " - "If you really want to specify it explicitly, then you should use some macros " - "to distinguish different shards and replicas"); } } @@ -827,6 +760,85 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ } } +void DatabaseReplicated::checkTableEngine(const ASTCreateQuery & query, ASTStorage & storage, ContextPtr query_context) const +{ + bool replicated_table = storage.engine && + (startsWith(storage.engine->name, "Replicated") || startsWith(storage.engine->name, "Shared")); + if (!replicated_table || !storage.engine->arguments) + return; + + ASTs & args_ref = storage.engine->arguments->children; + ASTs args = args_ref; + if (args.size() < 2) + return; + + /// It can be a constant expression. Try to evaluate it, ignore exception if we cannot. + bool has_expression_argument = args_ref[0]->as() || args_ref[1]->as(); + if (has_expression_argument) + { + try + { + args[0] = evaluateConstantExpressionAsLiteral(args_ref[0]->clone(), query_context); + args[1] = evaluateConstantExpressionAsLiteral(args_ref[1]->clone(), query_context); + } + catch (...) // NOLINT(bugprone-empty-catch) + { + } + } + + ASTLiteral * arg1 = args[0]->as(); + ASTLiteral * arg2 = args[1]->as(); + if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String) + return; + + String maybe_path = arg1->value.get(); + String maybe_replica = arg2->value.get(); + + /// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments. + /// Let's ensure that some macros are used. + /// NOTE: we cannot check here that substituted values will be actually different on shards and replicas. + + Macros::MacroExpansionInfo info; + info.table_id = {getDatabaseName(), query.getTable(), query.uuid}; + info.shard = getShardName(); + info.replica = getReplicaName(); + query_context->getMacros()->expand(maybe_path, info); + bool maybe_shard_macros = info.expanded_other; + info.expanded_other = false; + query_context->getMacros()->expand(maybe_replica, info); + bool maybe_replica_macros = info.expanded_other; + bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros"); + + if (!enable_functional_tests_helper) + { + if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments) + LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments"); + else + throw Exception(ErrorCodes::INCORRECT_QUERY, + "It's not allowed to specify explicit zookeeper_path and replica_name " + "for ReplicatedMergeTree arguments in Replicated database. If you really want to " + "specify them explicitly, enable setting " + "database_replicated_allow_replicated_engine_arguments."); + } + + if (maybe_shard_macros && maybe_replica_macros) + return; + + if (enable_functional_tests_helper && !has_expression_argument) + { + if (maybe_path.empty() || maybe_path.back() != '/') + maybe_path += '/'; + args_ref[0]->as()->value = maybe_path + "auto_{shard}"; + args_ref[1]->as()->value = maybe_replica + "auto_{replica}"; + return; + } + + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. " + "If you really want to specify it explicitly, then you should use some macros " + "to distinguish different shards and replicas"); +} + BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags) { waitDatabaseStarted(); @@ -1312,11 +1324,9 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node if (create.uuid == UUIDHelpers::Nil || create.getTable() != TABLE_WITH_UUID_NAME_PLACEHOLDER || create.database) throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected query from {}: {}", node_name, query); - bool is_materialized_view_with_inner_table = create.is_materialized_view && create.to_table_id.empty(); - create.setDatabase(getDatabaseName()); create.setTable(unescapeForFileName(node_name)); - create.attach = is_materialized_view_with_inner_table; + create.attach = create.is_materialized_view_with_inner_table(); return ast; } diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index eab5b2ff931..8c3fa7c87f6 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -107,6 +107,7 @@ private: void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config); void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const; + void checkTableEngine(const ASTCreateQuery & query, ASTStorage & storage, ContextPtr query_context) const; void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 & max_log_ptr); diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index fe0baf30e57..6ccaf811764 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -289,8 +289,8 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n tables.erase(it); table_storage->is_detached = true; - if (table_storage->isSystemStorage() == false) - CurrentMetrics::sub(getAttachedCounterForStorage(table_storage), 1); + if (!table_storage->isSystemStorage() && database_name != DatabaseCatalog::SYSTEM_DATABASE) + CurrentMetrics::sub(getAttachedCounterForStorage(table_storage)); auto table_id = table_storage->getStorageID(); if (table_id.hasUUID()) @@ -334,8 +334,8 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - if (table->isSystemStorage() == false && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) - CurrentMetrics::add(getAttachedCounterForStorage(table), 1); + if (!table->isSystemStorage() && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) + CurrentMetrics::add(getAttachedCounterForStorage(table)); } void DatabaseWithOwnTablesBase::shutdown() diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index e9c642666d3..198f6c0ea04 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -59,7 +59,7 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile( std::optional read_until_position_, std::shared_ptr cache_log_) : ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0, file_size_) -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD , log(getLogger(fmt::format("CachedOnDiskReadBufferFromFile({})", cache_key_))) #else , log(getLogger("CachedOnDiskReadBufferFromFile")) @@ -452,7 +452,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme { case ReadType::CACHED: { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD size_t file_size = getFileSizeFromReadBuffer(*read_buffer_for_file_segment); if (file_size == 0 || range.left + file_size <= file_offset_of_buffer_end) throw Exception( @@ -937,7 +937,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() if (!result) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD if (read_type == ReadType::CACHED) { size_t cache_file_size = getFileSizeFromReadBuffer(*implementation_buffer); diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 34c59ecab08..5f745f3ccad 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1954,7 +1954,10 @@ struct ToRelativeSubsecondNumImpl return t.value; if (scale > scale_multiplier) return t.value / (scale / scale_multiplier); - return t.value * (scale_multiplier / scale); + return static_cast(t.value) * static_cast((scale_multiplier / scale)); + /// Casting ^^: All integers are Int64, yet if t.value is big enough the multiplication can still + /// overflow which is UB. This place is too low-level and generic to check if t.value is sane. + /// Therefore just let it overflow safely and don't bother further. } static Int64 execute(UInt32 t, const DateLUTImpl &) { diff --git a/src/Functions/acosh.cpp b/src/Functions/acosh.cpp index 5b071da9c40..2bab84c77af 100644 --- a/src/Functions/acosh.cpp +++ b/src/Functions/acosh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct AcoshName - { - static constexpr auto name = "acosh"; - }; - using FunctionAcosh = FunctionMathUnary>; + +struct AcoshName +{ + static constexpr auto name = "acosh"; +}; +using FunctionAcosh = FunctionMathUnary>; } diff --git a/src/Functions/addMicroseconds.cpp b/src/Functions/addMicroseconds.cpp index 0dcd6b4452f..8c0ae06dcd0 100644 --- a/src/Functions/addMicroseconds.cpp +++ b/src/Functions/addMicroseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(AddMicroseconds) { factory.registerFunction(); diff --git a/src/Functions/addMilliseconds.cpp b/src/Functions/addMilliseconds.cpp index 0e2b696d367..83e1f96ec4b 100644 --- a/src/Functions/addMilliseconds.cpp +++ b/src/Functions/addMilliseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(AddMilliseconds) { factory.registerFunction(); diff --git a/src/Functions/addNanoseconds.cpp b/src/Functions/addNanoseconds.cpp index 93eadc814d9..8f9a54752b9 100644 --- a/src/Functions/addNanoseconds.cpp +++ b/src/Functions/addNanoseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(AddNanoseconds) { factory.registerFunction(); diff --git a/src/Functions/aes_encrypt_mysql.cpp b/src/Functions/aes_encrypt_mysql.cpp index fb120151c25..33733f92b27 100644 --- a/src/Functions/aes_encrypt_mysql.cpp +++ b/src/Functions/aes_encrypt_mysql.cpp @@ -7,7 +7,6 @@ namespace DB { - namespace { diff --git a/src/Functions/appendTrailingCharIfAbsent.cpp b/src/Functions/appendTrailingCharIfAbsent.cpp index a5554171aaa..0e57d5c55ce 100644 --- a/src/Functions/appendTrailingCharIfAbsent.cpp +++ b/src/Functions/appendTrailingCharIfAbsent.cpp @@ -57,7 +57,7 @@ private: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & column = arguments[0].column; const auto & column_char = arguments[1].column; @@ -80,14 +80,13 @@ private: auto & dst_data = col_res->getChars(); auto & dst_offsets = col_res->getOffsets(); - const auto size = src_offsets.size(); - dst_data.resize(src_data.size() + size); - dst_offsets.resize(size); + dst_data.resize(src_data.size() + input_rows_count); + dst_offsets.resize(input_rows_count); ColumnString::Offset src_offset{}; ColumnString::Offset dst_offset{}; - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < input_rows_count; ++i) { const auto src_length = src_offsets[i] - src_offset; memcpySmallAllowReadWriteOverflow15(&dst_data[dst_offset], &src_data[src_offset], src_length); diff --git a/src/Functions/ascii.cpp b/src/Functions/ascii.cpp index 7c8158b53d4..0d50e5d203b 100644 --- a/src/Functions/ascii.cpp +++ b/src/Functions/ascii.cpp @@ -45,9 +45,7 @@ struct AsciiImpl size_t size = data.size() / n; for (size_t i = 0; i < size; ++i) - { res[i] = doAscii(data, i * n, n); - } } [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) diff --git a/src/Functions/asinh.cpp b/src/Functions/asinh.cpp index 6af832ae07c..b5e3626148f 100644 --- a/src/Functions/asinh.cpp +++ b/src/Functions/asinh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct AsinhName - { - static constexpr auto name = "asinh"; - }; - using FunctionAsinh = FunctionMathUnary>; + +struct AsinhName +{ + static constexpr auto name = "asinh"; +}; +using FunctionAsinh = FunctionMathUnary>; } diff --git a/src/Functions/atan2.cpp b/src/Functions/atan2.cpp index 42294e11458..218f4c5406f 100644 --- a/src/Functions/atan2.cpp +++ b/src/Functions/atan2.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct Atan2Name - { - static constexpr auto name = "atan2"; - }; - using FunctionAtan2 = FunctionMathBinaryFloat64>; + +struct Atan2Name +{ + static constexpr auto name = "atan2"; +}; +using FunctionAtan2 = FunctionMathBinaryFloat64>; } diff --git a/src/Functions/atanh.cpp b/src/Functions/atanh.cpp index fab25414725..a36f5bcbcf0 100644 --- a/src/Functions/atanh.cpp +++ b/src/Functions/atanh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct AtanhName - { - static constexpr auto name = "atanh"; - }; - using FunctionAtanh = FunctionMathUnary>; + +struct AtanhName +{ + static constexpr auto name = "atanh"; +}; +using FunctionAtanh = FunctionMathUnary>; } diff --git a/src/Functions/base58Encode.cpp b/src/Functions/base58Encode.cpp index cf790ebddab..3ae2fb12c5e 100644 --- a/src/Functions/base58Encode.cpp +++ b/src/Functions/base58Encode.cpp @@ -3,8 +3,10 @@ namespace DB { + REGISTER_FUNCTION(Base58Encode) { factory.registerFunction>(); } + } diff --git a/src/Functions/base64Decode.cpp b/src/Functions/base64Decode.cpp index 4d06ac99d6f..349475af3f0 100644 --- a/src/Functions/base64Decode.cpp +++ b/src/Functions/base64Decode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64Decode) { FunctionDocumentation::Description description = R"(Accepts a String and decodes it from base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Throws an exception in case of an error. Alias: FROM_BASE64.)"; @@ -19,6 +20,7 @@ REGISTER_FUNCTION(Base64Decode) /// MySQL compatibility alias. factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::Case::Insensitive); } + } #endif diff --git a/src/Functions/base64Encode.cpp b/src/Functions/base64Encode.cpp index 64142995552..fe0fa642599 100644 --- a/src/Functions/base64Encode.cpp +++ b/src/Functions/base64Encode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64Encode) { FunctionDocumentation::Description description = R"(Encodes a String as base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Alias: TO_BASE64.)"; @@ -19,6 +20,7 @@ REGISTER_FUNCTION(Base64Encode) /// MySQL compatibility alias. factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::Case::Insensitive); } + } #endif diff --git a/src/Functions/base64URLDecode.cpp b/src/Functions/base64URLDecode.cpp index f5766dc60bd..f256e111619 100644 --- a/src/Functions/base64URLDecode.cpp +++ b/src/Functions/base64URLDecode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64URLDecode) { FunctionDocumentation::Description description = R"(Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; @@ -16,6 +17,7 @@ REGISTER_FUNCTION(Base64URLDecode) factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } + } #endif diff --git a/src/Functions/base64URLEncode.cpp b/src/Functions/base64URLEncode.cpp index 73a465a30c5..215712f7586 100644 --- a/src/Functions/base64URLEncode.cpp +++ b/src/Functions/base64URLEncode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64URLEncode) { FunctionDocumentation::Description description = R"(Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; @@ -16,6 +17,7 @@ REGISTER_FUNCTION(Base64URLEncode) factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } + } #endif diff --git a/src/Functions/byteSize.cpp b/src/Functions/byteSize.cpp index 93a3a86641a..d366a1b2e12 100644 --- a/src/Functions/byteSize.cpp +++ b/src/Functions/byteSize.cpp @@ -67,11 +67,11 @@ public: const IColumn * column = arguments[arg_num].column.get(); if (arg_num == 0) - for (size_t row_num = 0; row_num < input_rows_count; ++row_num) - vec_res[row_num] = column->byteSizeAt(row_num); + for (size_t row = 0; row < input_rows_count; ++row) + vec_res[row] = column->byteSizeAt(row); else - for (size_t row_num = 0; row_num < input_rows_count; ++row_num) - vec_res[row_num] += column->byteSizeAt(row_num); + for (size_t row = 0; row < input_rows_count; ++row) + vec_res[row] += column->byteSizeAt(row); } return result_col; diff --git a/src/Functions/byteSwap.cpp b/src/Functions/byteSwap.cpp index 6c824b851b0..2094ec4fa1a 100644 --- a/src/Functions/byteSwap.cpp +++ b/src/Functions/byteSwap.cpp @@ -10,6 +10,7 @@ extern const int NOT_IMPLEMENTED; namespace { + template requires std::is_integral_v T byteSwap(T x) diff --git a/src/Functions/caseWithExpression.cpp b/src/Functions/caseWithExpression.cpp index 71fccc8436e..f0a620489ef 100644 --- a/src/Functions/caseWithExpression.cpp +++ b/src/Functions/caseWithExpression.cpp @@ -98,8 +98,7 @@ public: /// Execute transform. ColumnsWithTypeAndName transform_args{args.front(), src_array_col, dst_array_col, args.back()}; - return FunctionFactory::instance().get("transform", context)->build(transform_args) - ->execute(transform_args, result_type, input_rows_count); + return FunctionFactory::instance().get("transform", context)->build(transform_args)->execute(transform_args, result_type, input_rows_count); } private: diff --git a/src/Functions/convertCharset.cpp b/src/Functions/convertCharset.cpp index b3b7394acb9..d998e88e7c2 100644 --- a/src/Functions/convertCharset.cpp +++ b/src/Functions/convertCharset.cpp @@ -88,7 +88,8 @@ private: static void convert(const String & from_charset, const String & to_charset, const ColumnString::Chars & from_chars, const ColumnString::Offsets & from_offsets, - ColumnString::Chars & to_chars, ColumnString::Offsets & to_offsets) + ColumnString::Chars & to_chars, ColumnString::Offsets & to_offsets, + size_t input_rows_count) { auto converter_from = getConverter(from_charset); auto converter_to = getConverter(to_charset); @@ -96,12 +97,11 @@ private: ColumnString::Offset current_from_offset = 0; ColumnString::Offset current_to_offset = 0; - size_t size = from_offsets.size(); - to_offsets.resize(size); + to_offsets.resize(input_rows_count); PODArray uchars; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t from_string_size = from_offsets[i] - current_from_offset - 1; @@ -184,7 +184,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & arg_from = arguments[0]; const ColumnWithTypeAndName & arg_charset_from = arguments[1]; @@ -204,7 +204,7 @@ public: if (const ColumnString * col_from = checkAndGetColumn(arg_from.column.get())) { auto col_to = ColumnString::create(); - convert(charset_from, charset_to, col_from->getChars(), col_from->getOffsets(), col_to->getChars(), col_to->getOffsets()); + convert(charset_from, charset_to, col_from->getChars(), col_from->getOffsets(), col_to->getChars(), col_to->getOffsets(), input_rows_count); return col_to; } else diff --git a/src/Functions/cosh.cpp b/src/Functions/cosh.cpp index 54b52051aab..f4302292303 100644 --- a/src/Functions/cosh.cpp +++ b/src/Functions/cosh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct CoshName - { - static constexpr auto name = "cosh"; - }; - using FunctionCosh = FunctionMathUnary>; + +struct CoshName +{ + static constexpr auto name = "cosh"; +}; +using FunctionCosh = FunctionMathUnary>; } diff --git a/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp b/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp index 3f71bca63d2..99ae4f1927e 100644 --- a/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp +++ b/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp @@ -13,8 +13,7 @@ struct NameCountSubstringsCaseInsensitiveUTF8 static constexpr auto name = "countSubstringsCaseInsensitiveUTF8"; }; -using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch< - CountSubstringsImpl>; +using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch>; } diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index f49e8dee6b7..157068a7c8d 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -26,8 +26,6 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; extern const int BAD_ARGUMENTS; } @@ -45,84 +43,82 @@ public: template void dispatchForColumns( - const IColumn & x, const IColumn & y, + const IColumn & col_x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { - if (const auto * x_vec_16 = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_16, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_32 = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_32, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_32_s = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_32_s, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_64 = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_64, y, timezone_x, timezone_y, result); - else if (const auto * x_const_16 = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_16->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_32 = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_32->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_32_s = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_32_s->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_64 = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_64->getValue>(), y, timezone_x, timezone_y, result); + if (const auto * x_vec_16 = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_16, col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_vec_32 = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_32, col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_vec_32_s = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_32_s, col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_vec_64 = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_64, col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_const_16 = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_16->getValue(), col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_const_32 = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_32->getValue(), col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_const_32_s = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_32_s->getValue(), col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_const_64 = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_64->getValue>(), col_y, timezone_x, timezone_y, input_rows_count, result); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64", - name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64", name); } template void dispatchForSecondColumn( - const LeftColumnType & x, const IColumn & y, + const LeftColumnType & x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) - vectorVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) - vectorVector(x, *y_vec_32, timezone_x, timezone_y, result); - else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) - vectorVector(x, *y_vec_32_s, timezone_x, timezone_y, result); - else if (const auto * y_vec_64 = checkAndGetColumn(&y)) - vectorVector(x, *y_vec_64, timezone_x, timezone_y, result); - else if (const auto * y_const_16 = checkAndGetColumnConst(&y)) - vectorConstant(x, y_const_16->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_32 = checkAndGetColumnConst(&y)) - vectorConstant(x, y_const_32->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_32_s = checkAndGetColumnConst(&y)) - vectorConstant(x, y_const_32_s->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_64 = checkAndGetColumnConst(&y)) - vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, result); + if (const auto * y_vec_16 = checkAndGetColumn(&col_y)) + vectorVector(x, *y_vec_16, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_32 = checkAndGetColumn(&col_y)) + vectorVector(x, *y_vec_32, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_32_s = checkAndGetColumn(&col_y)) + vectorVector(x, *y_vec_32_s, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_64 = checkAndGetColumn(&col_y)) + vectorVector(x, *y_vec_64, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_const_16 = checkAndGetColumnConst(&col_y)) + vectorConstant(x, y_const_16->getValue(), timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_const_32 = checkAndGetColumnConst(&col_y)) + vectorConstant(x, y_const_32->getValue(), timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_const_32_s = checkAndGetColumnConst(&col_y)) + vectorConstant(x, y_const_32_s->getValue(), timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_const_64 = checkAndGetColumnConst(&col_y)) + vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, input_rows_count, result); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", - name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", name); } template void dispatchConstForSecondColumn( - T1 x, const IColumn & y, + T1 x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) - constantVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) - constantVector(x, *y_vec_32, timezone_x, timezone_y, result); - else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) - constantVector(x, *y_vec_32_s, timezone_x, timezone_y, result); - else if (const auto * y_vec_64 = checkAndGetColumn(&y)) - constantVector(x, *y_vec_64, timezone_x, timezone_y, result); + if (const auto * y_vec_16 = checkAndGetColumn(&col_y)) + constantVector(x, *y_vec_16, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_32 = checkAndGetColumn(&col_y)) + constantVector(x, *y_vec_32, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_32_s = checkAndGetColumn(&col_y)) + constantVector(x, *y_vec_32_s, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_64 = checkAndGetColumn(&col_y)) + constantVector(x, *y_vec_64, timezone_x, timezone_y, input_rows_count, result); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", - name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", name); } template void vectorVector( const LeftColumnType & x, const RightColumnType & y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { const auto & x_data = x.getData(); @@ -130,14 +126,15 @@ public: const auto transform_x = TransformDateTime64(getScale(x)); const auto transform_y = TransformDateTime64(getScale(y)); - for (size_t i = 0, size = x.size(); i < size; ++i) - result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y); + for (size_t i = 0; i < input_rows_count; ++i) + result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y); } template void vectorConstant( const LeftColumnType & x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { const auto & x_data = x.getData(); @@ -145,7 +142,7 @@ public: const auto transform_y = TransformDateTime64(getScale(y)); const auto y_value = stripDecimalFieldValue(y); - for (size_t i = 0, size = x.size(); i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) result[i] = calculate(transform_x, transform_y, x_data[i], y_value, timezone_x, timezone_y); } @@ -153,6 +150,7 @@ public: void constantVector( T1 x, const RightColumnType & y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { const auto & y_data = y.getData(); @@ -160,20 +158,22 @@ public: const auto transform_y = TransformDateTime64(getScale(y)); const auto x_value = stripDecimalFieldValue(x); - for (size_t i = 0, size = y.size(); i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) result[i] = calculate(transform_x, transform_y, x_value, y_data[i], timezone_x, timezone_y); } template Int64 calculate(const TransformX & transform_x, const TransformY & transform_y, T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const { + auto res = static_cast(transform_y.execute(y, timezone_y)) - static_cast(transform_x.execute(x, timezone_x)); + if constexpr (is_diff) - return static_cast(transform_y.execute(y, timezone_y)) - - static_cast(transform_x.execute(x, timezone_x)); + { + return res; + } else { - auto res = static_cast(transform_y.execute(y, timezone_y)) - - static_cast(transform_x.execute(x, timezone_x)); + /// Adjust res: DateTimeComponentsWithFractionalPart a_comp; DateTimeComponentsWithFractionalPart b_comp; Int64 adjust_value; @@ -332,95 +332,73 @@ public: static constexpr auto name = is_relative ? "dateDiff" : "age"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } bool isVariadic() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 3}; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 3 && arguments.size() != 4) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 3 or 4", - getName(), arguments.size()); + FunctionArgumentDescriptors mandatory_args{ + {"unit", static_cast(&isString), nullptr, "String"}, + {"startdate", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + {"enddate", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + }; - if (!isString(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument for function {} (unit) must be String", - getName()); + FunctionArgumentDescriptors optional_args{ + {"timezone", static_cast(&isString), nullptr, "String"}, + }; - if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument for function {} must be Date, Date32, DateTime or DateTime64", - getName()); - - if (!isDate(arguments[2]) && !isDate32(arguments[2]) && !isDateTime(arguments[2]) && !isDateTime64(arguments[2])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Third argument for function {} must be Date, Date32, DateTime or DateTime64", - getName() - ); - - if (arguments.size() == 4 && !isString(arguments[3])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Fourth argument for function {} (timezone) must be String", - getName()); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 3}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * unit_column = checkAndGetColumnConst(arguments[0].column.get()); - if (!unit_column) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "First argument for function {} must be constant String", - getName()); + const auto * col_unit = checkAndGetColumnConst(arguments[0].column.get()); + if (!col_unit) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument for function {} must be constant String", getName()); - String unit = Poco::toLower(unit_column->getValue()); + String unit = Poco::toLower(col_unit->getValue()); - const IColumn & x = *arguments[1].column; - const IColumn & y = *arguments[2].column; + const IColumn & col_x = *arguments[1].column; + const IColumn & col_y = *arguments[2].column; - size_t rows = input_rows_count; - auto res = ColumnInt64::create(rows); + auto col_res = ColumnInt64::create(input_rows_count); const auto & timezone_x = extractTimeZoneFromFunctionArguments(arguments, 3, 1); const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); if (unit == "year" || unit == "years" || unit == "yy" || unit == "yyyy") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "quarter" || unit == "quarters" || unit == "qq" || unit == "q") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "month" || unit == "months" || unit == "mm" || unit == "m") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "week" || unit == "weeks" || unit == "wk" || unit == "ww") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "day" || unit == "days" || unit == "dd" || unit == "d") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "hour" || unit == "hours" || unit == "hh" || unit == "h") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "minute" || unit == "minutes" || unit == "mi" || unit == "n") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "second" || unit == "seconds" || unit == "ss" || unit == "s") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "millisecond" || unit == "milliseconds" || unit == "ms") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "microsecond" || unit == "microseconds" || unit == "us" || unit == "u") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "nanosecond" || unit == "nanoseconds" || unit == "ns") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Function {} does not support '{}' unit", getName(), unit); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); - return res; + return col_res; } private: DateDiffImpl impl{name}; @@ -437,50 +415,35 @@ public: static constexpr auto name = "timeDiff"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } - + String getName() const override { return name; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } bool isVariadic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } size_t getNumberOfArguments() const override { return 2; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 2", - getName(), arguments.size()); + FunctionArgumentDescriptors args{ + {"first_datetime", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + {"second_datetime", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + }; - if (!isDate(arguments[0]) && !isDate32(arguments[0]) && !isDateTime(arguments[0]) && !isDateTime64(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument for function {} must be Date, Date32, DateTime or DateTime64", - getName()); - - if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument for function {} must be Date, Date32, DateTime or DateTime64", - getName() - ); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const IColumn & x = *arguments[0].column; - const IColumn & y = *arguments[1].column; + const IColumn & col_x = *arguments[0].column; + const IColumn & col_y = *arguments[1].column; - size_t rows = input_rows_count; - auto res = ColumnInt64::create(rows); + auto col_res = ColumnInt64::create(input_rows_count); - impl.dispatchForColumns>(x, y, DateLUT::instance(), DateLUT::instance(), res->getData()); + impl.dispatchForColumns>(col_x, col_y, DateLUT::instance(), DateLUT::instance(), input_rows_count, col_res->getData()); - return res; + return col_res; } private: DateDiffImpl impl{name}; diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp index 8165ea1b8d3..846cb87f1ee 100644 --- a/src/Functions/dateName.cpp +++ b/src/Functions/dateName.cpp @@ -109,14 +109,14 @@ public: ColumnPtr executeImpl( const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, - [[maybe_unused]] size_t input_rows_count) const override + size_t input_rows_count) const override { ColumnPtr res; - if (!((res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)))) + if (!((res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)))) throw Exception( ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Date or DateTime.", @@ -127,7 +127,7 @@ public: } template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { auto * times = checkAndGetColumn(arguments[1].column.get()); if (!times) @@ -144,7 +144,7 @@ public: String date_part = date_part_column->getValue(); const DateLUTImpl * time_zone_tmp; - if (std::is_same_v || std::is_same_v) + if constexpr (std::is_same_v || std::is_same_v) time_zone_tmp = &extractTimeZoneFromFunctionArguments(arguments, 2, 1); else time_zone_tmp = &DateLUT::instance(); @@ -175,7 +175,7 @@ public: using TimeType = DateTypeToTimeType; callOnDatePartWriter(date_part, [&](const auto & writer) { - for (size_t i = 0; i < times_data.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (std::is_same_v) { diff --git a/src/Functions/degrees.cpp b/src/Functions/degrees.cpp index 8646eb54d9a..94b5ce3682c 100644 --- a/src/Functions/degrees.cpp +++ b/src/Functions/degrees.cpp @@ -7,18 +7,20 @@ namespace DB { namespace { - struct DegreesName - { - static constexpr auto name = "degrees"; - }; - Float64 degrees(Float64 r) - { - Float64 degrees = r * (180 / M_PI); - return degrees; - } +struct DegreesName +{ + static constexpr auto name = "degrees"; +}; + +Float64 degrees(Float64 r) +{ + Float64 degrees = r * (180 / M_PI); + return degrees; +} + +using FunctionDegrees = FunctionMathUnary>; - using FunctionDegrees = FunctionMathUnary>; } REGISTER_FUNCTION(Degrees) diff --git a/src/Functions/filesystem.cpp b/src/Functions/filesystem.cpp index 9fbf9b0cbe7..9b168f3f088 100644 --- a/src/Functions/filesystem.cpp +++ b/src/Functions/filesystem.cpp @@ -91,7 +91,7 @@ public: auto col_res = ColumnVector::create(col_str->size()); auto & data = col_res->getData(); - for (size_t i = 0; i < col_str->size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { auto disk_name = col_str->getDataAt(i).toString(); if (auto it = disk_map.find(disk_name); it != disk_map.end()) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index f89afd67e78..f33b7849a43 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -848,7 +848,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, [[maybe_unused]] size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { ColumnPtr res; if constexpr (support_integer == SupportInteger::Yes) @@ -862,17 +862,17 @@ public: if (!castType(arguments[0].type.get(), [&](const auto & type) { using FromDataType = std::decay_t; - if (!(res = executeType(arguments, result_type))) + if (!(res = executeType(arguments, result_type, input_rows_count))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Integer, Date, Date32, DateTime or DateTime64.", arguments[0].column->getName(), getName()); return true; })) { - if (!((res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)))) + if (!((res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Integer or DateTime.", arguments[0].column->getName(), getName()); @@ -881,10 +881,10 @@ public: } else { - if (!((res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)))) + if (!((res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Date or DateTime.", arguments[0].column->getName(), getName()); @@ -894,7 +894,7 @@ public: } template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { auto non_const_datetime = arguments[0].column->convertToFullColumnIfConst(); auto * times = checkAndGetColumn(non_const_datetime.get()); @@ -955,13 +955,11 @@ public: else time_zone = &DateLUT::instance(); - const auto & vec = times->getData(); - auto col_res = ColumnString::create(); auto & res_data = col_res->getChars(); auto & res_offsets = col_res->getOffsets(); - res_data.resize(vec.size() * (out_template_size + 1)); - res_offsets.resize(vec.size()); + res_data.resize(input_rows_count * (out_template_size + 1)); + res_offsets.resize(input_rows_count); if constexpr (format_syntax == FormatSyntax::MySQL) { @@ -990,9 +988,11 @@ public: } } + const auto & vec = times->getData(); + auto * begin = reinterpret_cast(res_data.data()); auto * pos = begin; - for (size_t i = 0; i < vec.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if (!const_time_zone_column && arguments.size() > 2) { diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index d10b3f9a5b7..4e3f302ce36 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -75,7 +75,7 @@ public: if (const ColumnString * col_query_string = checkAndGetColumn(col_query.get())) { auto col_res = ColumnString::create(); - formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map); + formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map, input_rows_count); if (error_handling == ErrorHandling::Null) return ColumnNullable::create(std::move(col_res), std::move(col_null_map)); @@ -92,16 +92,16 @@ private: const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, - ColumnUInt8::MutablePtr & res_null_map) const + ColumnUInt8::MutablePtr & res_null_map, + size_t input_rows_count) const { - const size_t size = offsets.size(); - res_offsets.resize(size); + res_offsets.resize(input_rows_count); res_data.resize(data.size()); size_t prev_offset = 0; size_t res_data_size = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * begin = reinterpret_cast(&data[prev_offset]); const char * end = begin + offsets[i] - prev_offset - 1; diff --git a/src/Functions/formatReadable.h b/src/Functions/formatReadable.h index 487ec9d79d0..9161ab43e28 100644 --- a/src/Functions/formatReadable.h +++ b/src/Functions/formatReadable.h @@ -55,19 +55,19 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr res; - if (!((res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)))) + if (!((res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()); @@ -76,7 +76,7 @@ public: private: template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { if (const ColumnVector * col_from = checkAndGetColumn>(arguments[0].column.get())) { @@ -85,13 +85,12 @@ private: const typename ColumnVector::Container & vec_from = col_from->getData(); ColumnString::Chars & data_to = col_to->getChars(); ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - data_to.resize(size * 2); - offsets_to.resize(size); + data_to.resize(input_rows_count * 2); + offsets_to.resize(input_rows_count); WriteBufferFromVector buf_to(data_to); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Impl::format(static_cast(vec_from[i]), buf_to); writeChar(0, buf_to); diff --git a/src/Functions/geohashDecode.cpp b/src/Functions/geohashDecode.cpp index 96ad7dacfc4..cace6c09fec 100644 --- a/src/Functions/geohashDecode.cpp +++ b/src/Functions/geohashDecode.cpp @@ -51,21 +51,19 @@ public: } template - bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column) const + bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column, size_t input_rows_count) const { const auto * encoded = checkAndGetColumn(encoded_column); if (!encoded) return false; - const size_t count = encoded->size(); - - auto latitude = ColumnFloat64::create(count); - auto longitude = ColumnFloat64::create(count); + auto latitude = ColumnFloat64::create(input_rows_count); + auto longitude = ColumnFloat64::create(input_rows_count); ColumnFloat64::Container & lon_data = longitude->getData(); ColumnFloat64::Container & lat_data = latitude->getData(); - for (size_t i = 0; i < count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { std::string_view encoded_string = encoded->getDataAt(i).toView(); geohashDecode(encoded_string.data(), encoded_string.size(), &lon_data[i], &lat_data[i]); @@ -79,13 +77,13 @@ public: return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * encoded = arguments[0].column.get(); ColumnPtr res_column; - if (tryExecute(encoded, res_column) || - tryExecute(encoded, res_column)) + if (tryExecute(encoded, res_column, input_rows_count) || + tryExecute(encoded, res_column, input_rows_count)) return res_column; throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported argument type:{} of argument of function {}", diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index 034c8188b63..c49acddd81f 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -53,7 +53,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * longitude = arguments[0].column.get(); const IColumn * latitude = arguments[1].column.get(); @@ -65,26 +65,24 @@ public: precision = arguments[2].column; ColumnPtr res_column; - vector(longitude, latitude, precision.get(), res_column); + vector(longitude, latitude, precision.get(), res_column, input_rows_count); return res_column; } private: - void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const + void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result, size_t input_rows_count) const { auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); - const size_t size = lat_column->size(); - - out_offsets.resize(size); - out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); + out_offsets.resize(input_rows_count); + out_vec.resize(input_rows_count * (GEOHASH_MAX_TEXT_LENGTH + 1)); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const Float64 longitude_value = lon_column->getFloat64(i); const Float64 latitude_value = lat_column->getFloat64(i); diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 0dbc9946710..68500779f93 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -138,8 +138,7 @@ namespace } } - ColumnPtr executeImpl( - const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { std::call_once(once, [&] { initialize(arguments, result_type); }); diff --git a/src/IO/tests/gtest_memory_resize.cpp b/src/IO/tests/gtest_memory_resize.cpp index d760a948075..c3b34c352b2 100644 --- a/src/IO/tests/gtest_memory_resize.cpp +++ b/src/IO/tests/gtest_memory_resize.cpp @@ -134,7 +134,7 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding) ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - PADDING_FOR_SIMD); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); ASSERT_TRUE(memory.m_data); // state is intact after exception ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); @@ -158,7 +158,7 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding) ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); ASSERT_TRUE(memory.m_data); // state is intact after exception ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); @@ -197,7 +197,7 @@ TEST(MemoryResizeTest, BigInitAndSmallResizeOverflowWhenPadding) , ErrorCodes::ARGUMENT_OUT_OF_BOUND); } -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD { EXPECT_THROW_ERROR_CODE( { diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index 3c1af6538ad..b53a8b58023 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -917,8 +917,8 @@ TEST_P(SyncAsync, ExceptionOnUploadPart) { TEST_F(WBS3Test, PrefinalizeCalledMultipleTimes) { -#ifdef ABORT_ON_LOGICAL_ERROR - GTEST_SKIP() << "this test trigger LOGICAL_ERROR, runs only if ABORT_ON_LOGICAL_ERROR is not defined"; +#ifdef DEBUG_OR_SANITIZER_BUILD + GTEST_SKIP() << "this test trigger LOGICAL_ERROR, runs only if DEBUG_OR_SANITIZER_BUILD is not defined"; #else EXPECT_THROW({ try { diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index a3848fa3a75..a88c0de2cfe 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1007,7 +1007,7 @@ void FileCache::freeSpaceRatioKeepingThreadFunc() limits_satisfied = main_priority->collectCandidatesForEviction( desired_size, desired_elements_num, keep_up_free_space_remove_batch, stat, eviction_candidates, lock); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD /// Let's make sure that we correctly processed the limits. if (limits_satisfied && eviction_candidates.size() < keep_up_free_space_remove_batch) { @@ -1110,7 +1110,7 @@ void FileCache::removeAllReleasable(const UserID & user_id) { assertInitialized(); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD assertCacheCorrectness(); #endif @@ -1226,7 +1226,7 @@ void FileCache::loadMetadataImpl() if (first_exception) std::rethrow_exception(first_exception); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD assertCacheCorrectness(); #endif } @@ -1393,7 +1393,7 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir) FileCache::~FileCache() { deactivateBackgroundOperations(); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD assertCacheCorrectness(); #endif } diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 838ca0b491e..1664a91b694 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -67,7 +67,7 @@ FileSegment::FileSegment( , key_metadata(key_metadata_) , queue_iterator(queue_iterator_) , cache(cache_) -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD , log(getLogger(fmt::format("FileSegment({}) : {}", key_.toString(), range().toString()))) #else , log(getLogger("FileSegment")) @@ -385,9 +385,9 @@ void FileSegment::write(char * from, size_t size, size_t offset_in_file) try { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD /// This mutex is only needed to have a valid assertion in assertCacheCorrectness(), - /// which is only executed in debug/sanitizer builds (under ABORT_ON_LOGICAL_ERROR). + /// which is only executed in debug/sanitizer builds (under DEBUG_OR_SANITIZER_BUILD). std::lock_guard lock(write_mutex); #endif diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 1d23278a255..7e4b76d3cc6 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -963,7 +963,7 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( } else if (!can_be_broken) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); #else LOG_WARNING(key_metadata->logger(), "Expected path {} to exist, while removing {}:{}", diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ea10ad59db4..454cedae15c 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -82,13 +81,13 @@ #include #include -#include #include #include #include #include + namespace CurrentMetrics { extern const Metric AttachedTable; @@ -147,27 +146,27 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) } auto db_num_limit = getContext()->getGlobalContext()->getServerSettings().max_database_num_to_throw; - if (db_num_limit > 0) + if (db_num_limit > 0 && !internal) { size_t db_count = DatabaseCatalog::instance().getDatabases().size(); - std::vector system_databases = { + std::initializer_list system_databases = + { DatabaseCatalog::TEMPORARY_DATABASE, DatabaseCatalog::SYSTEM_DATABASE, DatabaseCatalog::INFORMATION_SCHEMA, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE, - DatabaseCatalog::DEFAULT_DATABASE }; for (const auto & system_database : system_databases) { - if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(system_database)) - db_count--; + if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(std::string(system_database))) + --db_count; } if (db_count >= db_num_limit) throw Exception(ErrorCodes::TOO_MANY_DATABASES, - "Too many databases in the Clickhouse. " - "The limit (setting 'max_database_num_to_throw') is set to {}, current number of databases is {}", + "Too many databases. " + "The limit (server configuration parameter `max_database_num_to_throw`) is set to {}, the current number of databases is {}", db_num_limit, db_count); } @@ -950,7 +949,7 @@ namespace throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated, Shared or KeeperMap table engines"); } - void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine) + void setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine) { if (engine == DefaultTableEngine::None) throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); @@ -970,9 +969,6 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (create.is_dictionary || create.is_ordinary_view || create.is_live_view || create.is_window_view) return; - if (create.is_materialized_view && create.to_table_id) - return; - if (create.temporary) { /// Some part of storage definition is specified, but ENGINE is not: just set the one from default_temporary_table_engine setting. @@ -987,22 +983,44 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const } if (!create.storage->engine) - { setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_temporary_table_engine.value); - } checkTemporaryTableEngineName(create.storage->engine->name); return; } + if (create.is_materialized_view) + { + /// A materialized view with an external target doesn't need a table engine. + if (create.is_materialized_view_with_external_target()) + return; + + if (auto to_engine = create.getTargetInnerEngine(ViewTarget::To)) + { + /// This materialized view already has a storage definition. + if (!to_engine->engine) + { + /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. + setDefaultTableEngine(*to_engine, getContext()->getSettingsRef().default_table_engine.value); + } + return; + } + } + if (create.storage) { - /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. + /// This table already has a storage definition. if (!create.storage->engine) + { + /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); + } return; } + /// We'll try to extract a storage definition from clause `AS`: + /// CREATE TABLE table_name AS other_table_name + std::shared_ptr storage_def; if (!create.as_table.empty()) { /// NOTE Getting the structure from the table specified in the AS is done not atomically with the creation of the table. @@ -1018,12 +1036,14 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (as_create.is_ordinary_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a View", qualified_name); - if (as_create.is_materialized_view && as_create.to_table_id) + if (as_create.is_materialized_view_with_external_target()) + { throw Exception( ErrorCodes::INCORRECT_QUERY, - "Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS `{}`\" instead", + "Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS {}\" instead", qualified_name, - as_create.to_table_id.getQualifiedName()); + as_create.getTargetTableID(ViewTarget::To).getFullTableName()); + } if (as_create.is_live_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Live View", qualified_name); @@ -1034,18 +1054,37 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (as_create.is_dictionary) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Dictionary", qualified_name); - if (as_create.storage) - create.set(create.storage, as_create.storage->ptr()); + if (as_create.is_materialized_view) + { + storage_def = as_create.getTargetInnerEngine(ViewTarget::To); + } else if (as_create.as_table_function) + { create.set(create.as_table_function, as_create.as_table_function->ptr()); + return; + } + else if (as_create.storage) + { + storage_def = typeid_cast>(as_create.storage->ptr()); + } else + { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug."); - - return; + } } - create.set(create.storage, std::make_shared()); - setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); + if (!storage_def) + { + /// Set ENGINE by default. + storage_def = std::make_shared(); + setDefaultTableEngine(*storage_def, getContext()->getSettingsRef().default_table_engine.value); + } + + /// Use the found table engine to modify the create query. + if (create.is_materialized_view) + create.setTargetInnerEngine(ViewTarget::To, storage_def); + else + create.set(create.storage, storage_def); } void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const DatabasePtr & database) const @@ -1087,11 +1126,11 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data kind_upper, create.table); } - create.generateRandomUUID(); + create.generateRandomUUIDs(); } else { - bool has_uuid = create.uuid != UUIDHelpers::Nil || create.to_inner_uuid != UUIDHelpers::Nil; + bool has_uuid = (create.uuid != UUIDHelpers::Nil) || create.hasInnerUUIDs(); if (has_uuid && !is_on_cluster && !internal) { /// We don't show the following error message either @@ -1106,8 +1145,7 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data /// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either /// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or /// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts. - create.uuid = UUIDHelpers::Nil; - create.to_inner_uuid = UUIDHelpers::Nil; + create.resetUUIDs(); } } @@ -1131,6 +1169,14 @@ void checkTableCanBeAddedWithNoCyclicDependencies(const ASTCreateQuery & create, DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(qualified_name, ref_dependencies, loading_dependencies); } +bool isReplicated(const ASTStorage & storage) +{ + if (!storage.engine) + return false; + const auto & storage_name = storage.engine->name; + return storage_name.starts_with("Replicated") || storage_name.starts_with("Shared"); +} + } BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) @@ -1247,8 +1293,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (!create.temporary && !create.database) create.setDatabase(current_database); - if (create.to_table_id && create.to_table_id.database_name.empty()) - create.to_table_id.database_name = current_database; + + if (create.targets) + create.targets->setCurrentDatabase(current_database); if (create.select && create.isView()) { @@ -1282,12 +1329,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create, mode); /// Check type compatible for materialized dest table and select columns - if (create.select && create.is_materialized_view && create.to_table_id && mode <= LoadingStrictnessLevel::CREATE) + if (create.is_materialized_view_with_external_target() && create.select && mode <= LoadingStrictnessLevel::CREATE) { - if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable( - {create.to_table_id.database_name, create.to_table_id.table_name, create.to_table_id.uuid}, - getContext() - )) + if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable(create.getTargetTableID(ViewTarget::To), getContext())) { Block input_block; @@ -1333,11 +1377,17 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; - if (create.storage && create.storage->engine) + + if (create.storage && isReplicated(*create.storage)) + is_storage_replicated = true; + + if (create.targets) { - const auto & storage_name = create.storage->engine->name; - if (storage_name.starts_with("Replicated") || storage_name.starts_with("Shared")) - is_storage_replicated = true; + for (const auto & inner_table_engine : create.targets->getInnerEngines()) + { + if (isReplicated(*inner_table_engine)) + is_storage_replicated = true; + } } const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated; @@ -1601,13 +1651,13 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, } UInt64 table_num_limit = getContext()->getGlobalContext()->getServerSettings().max_table_num_to_throw; - if (table_num_limit > 0 && create.getDatabase() != DatabaseCatalog::SYSTEM_DATABASE) + if (table_num_limit > 0 && !internal) { UInt64 table_count = CurrentMetrics::get(CurrentMetrics::AttachedTable); if (table_count >= table_num_limit) throw Exception(ErrorCodes::TOO_MANY_TABLES, - "Too many tables in the Clickhouse. " - "The limit (setting 'max_table_num_to_throw') is set to {}, current number of tables is {}", + "Too many tables. " + "The limit (server configuration parameter `max_table_num_to_throw`) is set to {}, the current number of tables is {}", table_num_limit, table_count); } @@ -1796,7 +1846,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont /// For CREATE query generate UUID on initiator, so it will be the same on all hosts. /// It will be ignored if database does not support UUIDs. - create.generateRandomUUID(); + create.generateRandomUUIDs(); /// For cross-replication cluster we cannot use UUID in replica path. String cluster_name_expanded = local_context->getMacros()->expand(cluster_name); @@ -1918,8 +1968,15 @@ AccessRightsElements InterpreterCreateQuery::getRequiredAccess() const } } - if (create.to_table_id) - required_access.emplace_back(AccessType::SELECT | AccessType::INSERT, create.to_table_id.database_name, create.to_table_id.table_name); + if (create.targets) + { + for (const auto & target : create.targets->targets) + { + const auto & target_id = target.table_id; + if (target_id) + required_access.emplace_back(AccessType::SELECT | AccessType::INSERT, target_id.database_name, target_id.table_name); + } + } if (create.storage && create.storage->engine) required_access.emplace_back(AccessType::TABLE_ENGINE, create.storage->engine->name); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index cd91f9532b9..7bee497f6da 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1726,7 +1726,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

getCurrentDataStream().header, join_clause.key_names_right); - if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys) + if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys) { auto * left_set = add_create_set(query_plan, join_clause.key_names_left, JoinTableSide::Left); auto * right_set = add_create_set(*joined_plan, join_clause.key_names_right, JoinTableSide::Right); diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 0fca7b64d5a..16add79d226 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -94,7 +94,8 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl() { auto & create = create_query->as(); create.uuid = UUIDHelpers::Nil; - create.to_inner_uuid = UUIDHelpers::Nil; + if (create.targets) + create.targets->resetInnerUUIDs(); } MutableColumnPtr column = ColumnString::create(); diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 5d237d28089..1b57ad2b622 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -538,7 +538,7 @@ Chunk DDLQueryStatusSource::generate() ExecutionStatus status(-1, "Cannot obtain error message"); /// Replicated database retries in case of error, it should not write error status. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD bool need_check_status = true; #else bool need_check_status = !is_replicated_database; diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 6dc009da9a8..f0f782c0a63 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include #include @@ -240,12 +242,12 @@ ASTPtr ASTCreateQuery::clone() const res->set(res->columns_list, columns_list->clone()); if (storage) res->set(res->storage, storage->clone()); - if (inner_storage) - res->set(res->inner_storage, inner_storage->clone()); if (select) res->set(res->select, select->clone()); if (table_overrides) res->set(res->table_overrides, table_overrides->clone()); + if (targets) + res->set(res->targets, targets->clone()); if (dictionary) { @@ -398,20 +400,18 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat refresh_strategy->formatImpl(settings, state, frame); } - if (to_table_id) + if (auto to_table_id = getTargetTableID(ViewTarget::To)) { - assert((is_materialized_view || is_window_view) && to_inner_uuid == UUIDHelpers::Nil); - settings.ostr - << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : "") - << (!to_table_id.database_name.empty() ? backQuoteIfNeed(to_table_id.database_name) + "." : "") - << backQuoteIfNeed(to_table_id.table_name); + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::TO) + << (settings.hilite ? hilite_none : "") << " " + << (!to_table_id.database_name.empty() ? backQuoteIfNeed(to_table_id.database_name) + "." : "") + << backQuoteIfNeed(to_table_id.table_name); } - if (to_inner_uuid != UUIDHelpers::Nil) + if (auto to_inner_uuid = getTargetInnerUUID(ViewTarget::To); to_inner_uuid != UUIDHelpers::Nil) { - assert(is_materialized_view && !to_table_id); - settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO INNER UUID " << (settings.hilite ? hilite_none : "") - << quoteString(toString(to_inner_uuid)); + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::TO_INNER_UUID) + << (settings.hilite ? hilite_none : "") << " " << quoteString(toString(to_inner_uuid)); } bool should_add_empty = is_create_empty; @@ -471,14 +471,17 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat frame.expression_list_always_start_on_new_line = false; - if (inner_storage) + if (storage) + storage->formatImpl(settings, state, frame); + + if (auto inner_storage = getTargetInnerEngine(ViewTarget::Inner)) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " INNER" << (settings.hilite ? hilite_none : ""); + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::INNER) << (settings.hilite ? hilite_none : ""); inner_storage->formatImpl(settings, state, frame); } - if (storage) - storage->formatImpl(settings, state, frame); + if (auto to_storage = getTargetInnerEngine(ViewTarget::To)) + to_storage->formatImpl(settings, state, frame); if (dictionary) dictionary->formatImpl(settings, state, frame); @@ -538,48 +541,57 @@ bool ASTCreateQuery::isParameterizedView() const } -ASTCreateQuery::UUIDs::UUIDs(const ASTCreateQuery & query) - : uuid(query.uuid) - , to_inner_uuid(query.to_inner_uuid) +void ASTCreateQuery::generateRandomUUIDs() { + CreateQueryUUIDs{*this, /* generate_random= */ true}.copyToQuery(*this); } -String ASTCreateQuery::UUIDs::toString() const +void ASTCreateQuery::resetUUIDs() { - WriteBufferFromOwnString out; - out << "{" << uuid << "," << to_inner_uuid << "}"; - return out.str(); + CreateQueryUUIDs{}.copyToQuery(*this); } -ASTCreateQuery::UUIDs ASTCreateQuery::UUIDs::fromString(const String & str) + +StorageID ASTCreateQuery::getTargetTableID(ViewTarget::Kind target_kind) const { - ReadBufferFromString in{str}; - ASTCreateQuery::UUIDs res; - in >> "{" >> res.uuid >> "," >> res.to_inner_uuid >> "}"; - return res; + if (targets) + return targets->getTableID(target_kind); + return StorageID::createEmpty(); } -ASTCreateQuery::UUIDs ASTCreateQuery::generateRandomUUID(bool always_generate_new_uuid) +bool ASTCreateQuery::hasTargetTableID(ViewTarget::Kind target_kind) const { - if (always_generate_new_uuid) - setUUID({}); - - if (uuid == UUIDHelpers::Nil) - uuid = UUIDHelpers::generateV4(); - - /// If destination table (to_table_id) is not specified for materialized view, - /// then MV will create inner table. We should generate UUID of inner table here. - bool need_uuid_for_inner_table = !attach && is_materialized_view && !to_table_id; - if (need_uuid_for_inner_table && (to_inner_uuid == UUIDHelpers::Nil)) - to_inner_uuid = UUIDHelpers::generateV4(); - - return UUIDs{*this}; + if (targets) + return targets->hasTableID(target_kind); + return false; } -void ASTCreateQuery::setUUID(const UUIDs & uuids) +UUID ASTCreateQuery::getTargetInnerUUID(ViewTarget::Kind target_kind) const { - uuid = uuids.uuid; - to_inner_uuid = uuids.to_inner_uuid; + if (targets) + return targets->getInnerUUID(target_kind); + return UUIDHelpers::Nil; +} + +bool ASTCreateQuery::hasInnerUUIDs() const +{ + if (targets) + return targets->hasInnerUUIDs(); + return false; +} + +std::shared_ptr ASTCreateQuery::getTargetInnerEngine(ViewTarget::Kind target_kind) const +{ + if (targets) + return targets->getInnerEngine(target_kind); + return nullptr; +} + +void ASTCreateQuery::setTargetInnerEngine(ViewTarget::Kind target_kind, ASTPtr storage_def) +{ + if (!targets) + set(targets, std::make_shared()); + targets->setInnerEngine(target_kind, storage_def); } } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 9e4364b1f25..a95010aea31 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ namespace DB class ASTFunction; class ASTSetQuery; class ASTSelectWithUnionQuery; +struct CreateQueryUUIDs; class ASTStorage : public IAST @@ -101,17 +103,15 @@ public: bool has_uuid{false}; // CREATE TABLE x UUID '...' ASTColumns * columns_list = nullptr; - - StorageID to_table_id = StorageID::createEmpty(); /// For CREATE MATERIALIZED VIEW mv TO table. - UUID to_inner_uuid = UUIDHelpers::Nil; /// For materialized view with inner table - ASTStorage * inner_storage = nullptr; /// For window view with inner table ASTStorage * storage = nullptr; + ASTPtr watermark_function; ASTPtr lateness_function; String as_database; String as_table; IAST * as_table_function = nullptr; ASTSelectWithUnionQuery * select = nullptr; + ASTViewTargets * targets = nullptr; IAST * comment = nullptr; ASTPtr sql_security = nullptr; @@ -153,17 +153,26 @@ public: QueryKind getQueryKind() const override { return QueryKind::Create; } - struct UUIDs - { - UUID uuid = UUIDHelpers::Nil; - UUID to_inner_uuid = UUIDHelpers::Nil; - UUIDs() = default; - explicit UUIDs(const ASTCreateQuery & query); - String toString() const; - static UUIDs fromString(const String & str); - }; - UUIDs generateRandomUUID(bool always_generate_new_uuid = false); - void setUUID(const UUIDs & uuids); + /// Generates a random UUID for this create query if it's not specified already. + /// The function also generates random UUIDs for inner target tables if this create query implies that + /// (for example, if it's a `CREATE MATERIALIZED VIEW` query with an inner storage). + void generateRandomUUIDs(); + + /// Removes UUID from this create query. + /// The function also removes UUIDs for inner target tables from this create query (see also generateRandomUUID()). + void resetUUIDs(); + + /// Returns information about a target table. + /// If that information isn't specified in this create query (or even not allowed) then the function returns an empty value. + StorageID getTargetTableID(ViewTarget::Kind target_kind) const; + bool hasTargetTableID(ViewTarget::Kind target_kind) const; + UUID getTargetInnerUUID(ViewTarget::Kind target_kind) const; + bool hasInnerUUIDs() const; + std::shared_ptr getTargetInnerEngine(ViewTarget::Kind target_kind) const; + void setTargetInnerEngine(ViewTarget::Kind target_kind, ASTPtr storage_def); + + bool is_materialized_view_with_external_target() const { return is_materialized_view && hasTargetTableID(ViewTarget::To); } + bool is_materialized_view_with_inner_table() const { return is_materialized_view && !hasTargetTableID(ViewTarget::To); } protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; @@ -171,8 +180,8 @@ protected: void forEachPointerToChild(std::function f) override { f(reinterpret_cast(&columns_list)); - f(reinterpret_cast(&inner_storage)); f(reinterpret_cast(&storage)); + f(reinterpret_cast(&targets)); f(reinterpret_cast(&as_table_function)); f(reinterpret_cast(&select)); f(reinterpret_cast(&comment)); diff --git a/src/Parsers/ASTViewTargets.cpp b/src/Parsers/ASTViewTargets.cpp new file mode 100644 index 00000000000..8ee98e704df --- /dev/null +++ b/src/Parsers/ASTViewTargets.cpp @@ -0,0 +1,300 @@ +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + + +std::string_view toString(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return "to"; + case ViewTarget::Inner: return "inner"; + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "{} doesn't support kind {}", __FUNCTION__, kind); +} + +void parseFromString(ViewTarget::Kind & out, std::string_view str) +{ + for (auto kind : magic_enum::enum_values()) + { + if (toString(kind) == str) + { + out = kind; + return; + } + } + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: Unexpected string {}", __FUNCTION__, str); +} + + +std::vector ASTViewTargets::getKinds() const +{ + std::vector kinds; + kinds.reserve(targets.size()); + for (const auto & target : targets) + kinds.push_back(target.kind); + return kinds; +} + + +void ASTViewTargets::setTableID(ViewTarget::Kind kind, const StorageID & table_id_) +{ + for (auto & target : targets) + { + if (target.kind == kind) + { + target.table_id = table_id_; + return; + } + } + if (table_id_) + targets.emplace_back(kind).table_id = table_id_; +} + +StorageID ASTViewTargets::getTableID(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return target->table_id; + return StorageID::createEmpty(); +} + +bool ASTViewTargets::hasTableID(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return !target->table_id.empty(); + return false; +} + +void ASTViewTargets::setCurrentDatabase(const String & current_database) +{ + for (auto & target : targets) + { + auto & table_id = target.table_id; + if (!table_id.table_name.empty() && table_id.database_name.empty()) + table_id.database_name = current_database; + } +} + +void ASTViewTargets::setInnerUUID(ViewTarget::Kind kind, const UUID & inner_uuid_) +{ + for (auto & target : targets) + { + if (target.kind == kind) + { + target.inner_uuid = inner_uuid_; + return; + } + } + if (inner_uuid_ != UUIDHelpers::Nil) + targets.emplace_back(kind).inner_uuid = inner_uuid_; +} + +UUID ASTViewTargets::getInnerUUID(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return target->inner_uuid; + return UUIDHelpers::Nil; +} + +bool ASTViewTargets::hasInnerUUID(ViewTarget::Kind kind) const +{ + return getInnerUUID(kind) != UUIDHelpers::Nil; +} + +void ASTViewTargets::resetInnerUUIDs() +{ + for (auto & target : targets) + target.inner_uuid = UUIDHelpers::Nil; +} + +bool ASTViewTargets::hasInnerUUIDs() const +{ + for (const auto & target : targets) + { + if (target.inner_uuid != UUIDHelpers::Nil) + return true; + } + return false; +} + +void ASTViewTargets::setInnerEngine(ViewTarget::Kind kind, ASTPtr storage_def) +{ + auto new_inner_engine = typeid_cast>(storage_def); + if (!new_inner_engine && storage_def) + throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Bad cast from type {} to ASTStorage", storage_def->getID()); + + for (auto & target : targets) + { + if (target.kind == kind) + { + if (target.inner_engine == new_inner_engine) + return; + if (new_inner_engine) + children.push_back(new_inner_engine); + if (target.inner_engine) + std::erase(children, target.inner_engine); + target.inner_engine = new_inner_engine; + return; + } + } + + if (new_inner_engine) + { + targets.emplace_back(kind).inner_engine = new_inner_engine; + children.push_back(new_inner_engine); + } +} + +std::shared_ptr ASTViewTargets::getInnerEngine(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return target->inner_engine; + return nullptr; +} + +std::vector> ASTViewTargets::getInnerEngines() const +{ + std::vector> res; + res.reserve(targets.size()); + for (const auto & target : targets) + { + if (target.inner_engine) + res.push_back(target.inner_engine); + } + return res; +} + +const ViewTarget * ASTViewTargets::tryGetTarget(ViewTarget::Kind kind) const +{ + for (const auto & target : targets) + { + if (target.kind == kind) + return ⌖ + } + return nullptr; +} + +ASTPtr ASTViewTargets::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + for (auto & target : res->targets) + { + if (target.inner_engine) + { + target.inner_engine = typeid_cast>(target.inner_engine->clone()); + res->children.push_back(target.inner_engine); + } + } + return res; +} + +void ASTViewTargets::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const +{ + for (const auto & target : targets) + formatTarget(target, s, state, frame); +} + +void ASTViewTargets::formatTarget(ViewTarget::Kind kind, const FormatSettings & s, FormatState & state, FormatStateStacked frame) const +{ + for (const auto & target : targets) + { + if (target.kind == kind) + formatTarget(target, s, state, frame); + } +} + +void ASTViewTargets::formatTarget(const ViewTarget & target, const FormatSettings & s, FormatState & state, FormatStateStacked frame) +{ + if (target.table_id) + { + auto keyword = getKeywordForTableID(target.kind); + if (!keyword) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No keyword for table name of kind {}", toString(target.kind)); + s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) + << (s.hilite ? hilite_none : "") << " " + << (!target.table_id.database_name.empty() ? backQuoteIfNeed(target.table_id.database_name) + "." : "") + << backQuoteIfNeed(target.table_id.table_name); + } + + if (target.inner_uuid != UUIDHelpers::Nil) + { + auto keyword = getKeywordForInnerUUID(target.kind); + if (!keyword) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No prefix keyword for inner UUID of kind {}", toString(target.kind)); + s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) + << (s.hilite ? hilite_none : "") << " " << quoteString(toString(target.inner_uuid)); + } + + if (target.inner_engine) + { + auto keyword = getKeywordForInnerStorage(target.kind); + if (!keyword) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No prefix keyword for table engine of kind {}", toString(target.kind)); + s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) << (s.hilite ? hilite_none : ""); + target.inner_engine->formatImpl(s, state, frame); + } +} + +std::optional ASTViewTargets::getKeywordForTableID(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return Keyword::TO; /// TO mydb.mydata + case ViewTarget::Inner: return std::nullopt; + } + UNREACHABLE(); +} + +std::optional ASTViewTargets::getKeywordForInnerStorage(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return std::nullopt; /// ENGINE = MergeTree() + case ViewTarget::Inner: return Keyword::INNER; /// INNER ENGINE = MergeTree() + } + UNREACHABLE(); +} + +std::optional ASTViewTargets::getKeywordForInnerUUID(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return Keyword::TO_INNER_UUID; /// TO INNER UUID 'XXX' + case ViewTarget::Inner: return std::nullopt; + } + UNREACHABLE(); +} + +void ASTViewTargets::forEachPointerToChild(std::function f) +{ + for (auto & target : targets) + { + if (target.inner_engine) + { + ASTStorage * new_inner_engine = target.inner_engine.get(); + f(reinterpret_cast(&new_inner_engine)); + if (new_inner_engine != target.inner_engine.get()) + { + if (new_inner_engine) + target.inner_engine = typeid_cast>(new_inner_engine->ptr()); + else + target.inner_engine.reset(); + } + } + } +} + +} diff --git a/src/Parsers/ASTViewTargets.h b/src/Parsers/ASTViewTargets.h new file mode 100644 index 00000000000..12182919f0e --- /dev/null +++ b/src/Parsers/ASTViewTargets.h @@ -0,0 +1,115 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ASTStorage; +enum class Keyword : size_t; + +/// Information about target tables (external or inner) of a materialized view or a window view. +/// See ASTViewTargets for more details. +struct ViewTarget +{ + enum Kind + { + /// If `kind == ViewTarget::To` then `ViewTarget` contains information about the "TO" table of a materialized view or a window view: + /// CREATE MATERIALIZED VIEW db.mv_name {TO [db.]to_target | ENGINE to_engine} AS SELECT ... + /// or + /// CREATE WINDOW VIEW db.wv_name {TO [db.]to_target | ENGINE to_engine} AS SELECT ... + To, + + /// If `kind == ViewTarget::Inner` then `ViewTarget` contains information about the "INNER" table of a window view: + /// CREATE WINDOW VIEW db.wv_name {INNER ENGINE inner_engine} AS SELECT ... + Inner, + }; + + Kind kind = To; + + /// StorageID of the target table, if it's not inner. + /// That storage ID can be seen for example after "TO" in a statement like CREATE MATERIALIZED VIEW ... TO ... + StorageID table_id = StorageID::createEmpty(); + + /// UUID of the target table, if it's inner. + /// The UUID is calculated automatically and can be seen for example after "TO INNER UUID" in a statement like + /// CREATE MATERIALIZED VIEW ... TO INNER UUID ... + UUID inner_uuid = UUIDHelpers::Nil; + + /// Table engine of the target table, if it's inner. + /// That engine can be seen for example after "ENGINE" in a statement like CREATE MATERIALIZED VIEW ... ENGINE ... + std::shared_ptr inner_engine; +}; + +/// Converts ViewTarget::Kind to a string. +std::string_view toString(ViewTarget::Kind kind); +void parseFromString(ViewTarget::Kind & out, std::string_view str); + + +/// Information about all target tables (external or inner) of a view. +/// +/// For example, for a materialized view: +/// CREATE MATERIALIZED VIEW db.mv_name [TO [db.]to_target | ENGINE to_engine] AS SELECT ... +/// this class contains information about the "TO" table: its name and database (if it's external), its UUID and engine (if it's inner). +/// +/// For a window view: +/// CREATE WINDOW VIEW db.wv_name [TO [db.]to_target | ENGINE to_engine] [INNER ENGINE inner_engine] AS SELECT ... +/// this class contains information about both the "TO" table and the "INNER" table. +class ASTViewTargets : public IAST +{ +public: + std::vector targets; + + /// Sets the StorageID of the target table, if it's not inner. + /// That storage ID can be seen for example after "TO" in a statement like CREATE MATERIALIZED VIEW ... TO ... + void setTableID(ViewTarget::Kind kind, const StorageID & table_id_); + StorageID getTableID(ViewTarget::Kind kind) const; + bool hasTableID(ViewTarget::Kind kind) const; + + /// Replaces an empty database in the StorageID of the target table with a specified database. + void setCurrentDatabase(const String & current_database); + + /// Sets the UUID of the target table, if it's inner. + /// The UUID is calculated automatically and can be seen for example after "TO INNER UUID" in a statement like + /// CREATE MATERIALIZED VIEW ... TO INNER UUID ... + void setInnerUUID(ViewTarget::Kind kind, const UUID & inner_uuid_); + UUID getInnerUUID(ViewTarget::Kind kind) const; + bool hasInnerUUID(ViewTarget::Kind kind) const; + + void resetInnerUUIDs(); + bool hasInnerUUIDs() const; + + /// Sets the table engine of the target table, if it's inner. + /// That engine can be seen for example after "ENGINE" in a statement like CREATE MATERIALIZED VIEW ... ENGINE ... + void setInnerEngine(ViewTarget::Kind kind, ASTPtr storage_def); + std::shared_ptr getInnerEngine(ViewTarget::Kind kind) const; + std::vector> getInnerEngines() const; + + /// Returns a list of all kinds of views in this ASTViewTargets. + std::vector getKinds() const; + + /// Returns information about a target table. + /// The function returns null if such target doesn't exist. + const ViewTarget * tryGetTarget(ViewTarget::Kind kind) const; + + String getID(char) const override { return "ViewTargets"; } + + ASTPtr clone() const override; + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; + + /// Formats information only about a specific target table. + void formatTarget(ViewTarget::Kind kind, const FormatSettings & s, FormatState & state, FormatStateStacked frame) const; + static void formatTarget(const ViewTarget & target, const FormatSettings & s, FormatState & state, FormatStateStacked frame); + + /// Helper functions for class ParserViewTargets. Returns a prefix keyword matching a specified target kind. + static std::optional getKeywordForTableID(ViewTarget::Kind kind); + static std::optional getKeywordForInnerUUID(ViewTarget::Kind kind); + static std::optional getKeywordForInnerStorage(ViewTarget::Kind kind); + +protected: + void forEachPointerToChild(std::function f) override; +}; + +} diff --git a/src/Parsers/CreateQueryUUIDs.cpp b/src/Parsers/CreateQueryUUIDs.cpp new file mode 100644 index 00000000000..4dfee67b537 --- /dev/null +++ b/src/Parsers/CreateQueryUUIDs.cpp @@ -0,0 +1,168 @@ +#include + +#include +#include +#include +#include + + +namespace DB +{ + +CreateQueryUUIDs::CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_random, bool force_random) +{ + if (!generate_random || !force_random) + { + uuid = query.uuid; + if (query.targets) + { + for (const auto & target : query.targets->targets) + setTargetInnerUUID(target.kind, target.inner_uuid); + } + } + + if (generate_random) + { + if (uuid == UUIDHelpers::Nil) + uuid = UUIDHelpers::generateV4(); + + /// For an ATTACH query we should never generate UUIDs for its inner target tables + /// because for an ATTACH query those inner target tables probably already exist and can be accessible by names. + /// If we generate random UUIDs for already existing tables then those UUIDs will not be correct making those inner target table inaccessible. + /// Thus it's not safe for example to replace + /// "ATTACH MATERIALIZED VIEW mv AS SELECT a FROM b" with + /// "ATTACH MATERIALIZED VIEW mv TO INNER UUID "XXXX" AS SELECT a FROM b" + /// This replacement is safe only for CREATE queries when inner target tables don't exist yet. + if (!query.attach) + { + auto generate_target_uuid = [&](ViewTarget::Kind target_kind) + { + if ((query.getTargetInnerUUID(target_kind) == UUIDHelpers::Nil) && query.getTargetTableID(target_kind).empty()) + setTargetInnerUUID(target_kind, UUIDHelpers::generateV4()); + }; + + /// If destination table (to_table_id) is not specified for materialized view, + /// then MV will create inner table. We should generate UUID of inner table here. + if (query.is_materialized_view) + generate_target_uuid(ViewTarget::To); + } + } +} + +bool CreateQueryUUIDs::empty() const +{ + if (uuid != UUIDHelpers::Nil) + return false; + for (const auto & [_, inner_uuid] : targets_inner_uuids) + { + if (inner_uuid != UUIDHelpers::Nil) + return false; + } + return true; +} + +String CreateQueryUUIDs::toString() const +{ + WriteBufferFromOwnString out; + out << "{"; + bool need_comma = false; + auto add_name_and_uuid_to_string = [&](std::string_view name_, const UUID & uuid_) + { + if (std::exchange(need_comma, true)) + out << ", "; + out << "\"" << name_ << "\": \"" << uuid_ << "\""; + }; + if (uuid != UUIDHelpers::Nil) + add_name_and_uuid_to_string("uuid", uuid); + for (const auto & [kind, inner_uuid] : targets_inner_uuids) + { + if (inner_uuid != UUIDHelpers::Nil) + add_name_and_uuid_to_string(::DB::toString(kind), inner_uuid); + } + out << "}"; + return out.str(); +} + +CreateQueryUUIDs CreateQueryUUIDs::fromString(const String & str) +{ + ReadBufferFromString in{str}; + CreateQueryUUIDs res; + skipWhitespaceIfAny(in); + in >> "{"; + skipWhitespaceIfAny(in); + char c; + while (in.peek(c) && c != '}') + { + String name; + String value; + readDoubleQuotedString(name, in); + skipWhitespaceIfAny(in); + in >> ":"; + skipWhitespaceIfAny(in); + readDoubleQuotedString(value, in); + skipWhitespaceIfAny(in); + if (name == "uuid") + { + res.uuid = parse(value); + } + else + { + ViewTarget::Kind kind; + parseFromString(kind, name); + res.setTargetInnerUUID(kind, parse(value)); + } + if (in.peek(c) && c == ',') + { + in.ignore(1); + skipWhitespaceIfAny(in); + } + } + in >> "}"; + return res; +} + +void CreateQueryUUIDs::setTargetInnerUUID(ViewTarget::Kind kind, const UUID & new_inner_uuid) +{ + for (auto & pair : targets_inner_uuids) + { + if (pair.first == kind) + { + pair.second = new_inner_uuid; + return; + } + } + if (new_inner_uuid != UUIDHelpers::Nil) + targets_inner_uuids.emplace_back(kind, new_inner_uuid); +} + +UUID CreateQueryUUIDs::getTargetInnerUUID(ViewTarget::Kind kind) const +{ + for (const auto & pair : targets_inner_uuids) + { + if (pair.first == kind) + return pair.second; + } + return UUIDHelpers::Nil; +} + +void CreateQueryUUIDs::copyToQuery(ASTCreateQuery & query) const +{ + query.uuid = uuid; + + if (query.targets) + query.targets->resetInnerUUIDs(); + + if (!targets_inner_uuids.empty()) + { + if (!query.targets) + query.set(query.targets, std::make_shared()); + + for (const auto & [kind, inner_uuid] : targets_inner_uuids) + { + if (inner_uuid != UUIDHelpers::Nil) + query.targets->setInnerUUID(kind, inner_uuid); + } + } +} + +} diff --git a/src/Parsers/CreateQueryUUIDs.h b/src/Parsers/CreateQueryUUIDs.h new file mode 100644 index 00000000000..419dad24b35 --- /dev/null +++ b/src/Parsers/CreateQueryUUIDs.h @@ -0,0 +1,40 @@ +#pragma once + +#include + + +namespace DB +{ +class ASTCreateQuery; + +/// The UUID of a table or a database defined with a CREATE QUERY along with the UUIDs of its inner targets. +struct CreateQueryUUIDs +{ + CreateQueryUUIDs() = default; + + /// Collect UUIDs from ASTCreateQuery. + /// Parameters: + /// `generate_random` - if it's true then unspecified in the query UUIDs will be generated randomly; + /// `force_random` - if it's true then all UUIDs (even specified in the query) will be (re)generated randomly. + explicit CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_random = false, bool force_random = false); + + bool empty() const; + explicit operator bool() const { return !empty(); } + + String toString() const; + static CreateQueryUUIDs fromString(const String & str); + + void setTargetInnerUUID(ViewTarget::Kind kind, const UUID & new_inner_uuid); + UUID getTargetInnerUUID(ViewTarget::Kind kind) const; + + /// Copies UUIDs to ASTCreateQuery. + void copyToQuery(ASTCreateQuery & query) const; + + /// UUID of the table. + UUID uuid = UUIDHelpers::Nil; + + /// UUIDs of its target table (or tables). + std::vector> targets_inner_uuids; +}; + +} diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index fff8383e7b3..f97c042e91e 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2743,7 +2743,7 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po /// 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator if (op.function_name == "and" && layers.back()->between_counter) { - layers.back()->between_counter--; + --layers.back()->between_counter; op = finish_between_operator; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 014dc7bd3bf..41379a845e7 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -693,7 +694,8 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ASTPtr table; ASTPtr columns_list; - ASTPtr storage; + std::shared_ptr storage; + ASTPtr targets; ASTPtr as_database; ASTPtr as_table; ASTPtr as_table_function; @@ -773,6 +775,17 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return true; } + auto parse_storage = [&] + { + chassert(!storage); + ASTPtr ast; + if (!storage_p.parse(pos, ast, expected)) + return false; + + storage = typeid_cast>(ast); + return true; + }; + auto need_parse_as_select = [&is_create_empty, &pos, &expected]() { if (ParserKeyword{Keyword::EMPTY_AS}.ignore(pos, expected)) @@ -798,7 +811,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!s_rparen.ignore(pos, expected)) return false; - auto storage_parse_result = storage_p.parse(pos, storage, expected); + auto storage_parse_result = parse_storage(); if ((storage_parse_result || is_temporary) && need_parse_as_select()) { @@ -820,7 +833,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe */ else { - storage_p.parse(pos, storage, expected); + parse_storage(); /// CREATE|ATTACH TABLE ... AS ... if (need_parse_as_select()) @@ -843,7 +856,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe /// Optional - ENGINE can be specified. if (!storage) - storage_p.parse(pos, storage, expected); + parse_storage(); } } } @@ -904,6 +917,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); query->is_create_empty = is_create_empty; if (from_path) @@ -977,6 +991,13 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e return false; } + std::shared_ptr targets; + if (to_table) + { + targets = std::make_shared(); + targets->setTableID(ViewTarget::To, to_table->as()->getTableId()); + } + /// Optional - a list of columns can be specified. It must fully comply with SELECT. if (s_lparen.ignore(pos, expected)) { @@ -1017,14 +1038,12 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (query->table) query->children.push_back(query->table); - if (to_table) - query->to_table_id = to_table->as()->getTableId(); - query->set(query->columns_list, columns_list); tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); if (comment) query->set(query->comment, comment); @@ -1139,6 +1158,18 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & storage_p.parse(pos, storage, expected); } + std::shared_ptr targets; + if (to_table || storage || inner_storage) + { + targets = std::make_shared(); + if (to_table) + targets->setTableID(ViewTarget::To, to_table->as()->getTableId()); + if (storage) + targets->setInnerEngine(ViewTarget::To, storage); + if (inner_storage) + targets->setInnerEngine(ViewTarget::Inner, inner_storage); + } + // WATERMARK if (s_watermark.ignore(pos, expected)) { @@ -1195,12 +1226,8 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (query->table) query->children.push_back(query->table); - if (to_table) - query->to_table_id = to_table->as()->getTableId(); - query->set(query->columns_list, columns_list); - query->set(query->storage, storage); - query->set(query->inner_storage, inner_storage); + query->is_watermark_strictly_ascending = is_watermark_strictly_ascending; query->is_watermark_ascending = is_watermark_ascending; query->is_watermark_bounded = is_watermark_bounded; @@ -1213,6 +1240,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); return true; } @@ -1436,6 +1464,7 @@ bool ParserCreateDatabaseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e return true; } + bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_create(Keyword::CREATE); @@ -1622,13 +1651,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (query->table) query->children.push_back(query->table); - if (to_table) - query->to_table_id = to_table->as()->getTableId(); - if (to_inner_uuid) - query->to_inner_uuid = parseFromString(to_inner_uuid->as()->value.get()); - query->set(query->columns_list, columns_list); - query->set(query->storage, storage); + if (refresh_strategy) query->set(query->refresh_strategy, refresh_strategy); if (comment) @@ -1639,29 +1663,41 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (query->columns_list && query->columns_list->primary_key) { /// If engine is not set will use default one - if (!query->storage) - query->set(query->storage, std::make_shared()); - else if (query->storage->primary_key) + if (!storage) + storage = std::make_shared(); + auto & storage_ref = typeid_cast(*storage); + if (storage_ref.primary_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); - - query->storage->primary_key = query->columns_list->primary_key; - + storage_ref.primary_key = query->columns_list->primary_key; } if (query->columns_list && (query->columns_list->primary_key_from_columns)) { /// If engine is not set will use default one - if (!query->storage) - query->set(query->storage, std::make_shared()); - else if (query->storage->primary_key) + if (!storage) + storage = std::make_shared(); + auto & storage_ref = typeid_cast(*storage); + if (storage_ref.primary_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); + storage_ref.primary_key = query->columns_list->primary_key_from_columns; + } - query->storage->primary_key = query->columns_list->primary_key_from_columns; + std::shared_ptr targets; + if (to_table || to_inner_uuid || storage) + { + targets = std::make_shared(); + if (to_table) + targets->setTableID(ViewTarget::To, to_table->as()->getTableId()); + if (to_inner_uuid) + targets->setInnerUUID(ViewTarget::To, parseFromString(to_inner_uuid->as()->value.safeGet())); + if (storage) + targets->setInnerEngine(ViewTarget::To, storage); } tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); return true; } diff --git a/src/Parsers/ParserViewTargets.cpp b/src/Parsers/ParserViewTargets.cpp new file mode 100644 index 00000000000..8f010882cdd --- /dev/null +++ b/src/Parsers/ParserViewTargets.cpp @@ -0,0 +1,88 @@ +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +ParserViewTargets::ParserViewTargets() +{ + for (auto kind : magic_enum::enum_values()) + accept_kinds.push_back(kind); +} + +bool ParserViewTargets::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserStringLiteral literal_p; + ParserStorage storage_p{ParserStorage::TABLE_ENGINE}; + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); + + std::shared_ptr res; + + auto result = [&] -> ASTViewTargets & + { + if (!res) + res = std::make_shared(); + return *res; + }; + + for (;;) + { + auto start = pos; + for (auto kind : accept_kinds) + { + auto current = pos; + + auto keyword = ASTViewTargets::getKeywordForInnerUUID(kind); + if (keyword && ParserKeyword{*keyword}.ignore(pos, expected)) + { + ASTPtr ast; + if (literal_p.parse(pos, ast, expected)) + { + result().setInnerUUID(kind, parseFromString(ast->as()->value.safeGet())); + break; + } + } + pos = current; + + keyword = ASTViewTargets::getKeywordForInnerStorage(kind); + if (keyword && ParserKeyword{*keyword}.ignore(pos, expected)) + { + ASTPtr ast; + if (storage_p.parse(pos, ast, expected)) + { + result().setInnerEngine(kind, ast); + break; + } + } + pos = current; + + keyword = ASTViewTargets::getKeywordForTableID(kind); + if (keyword && ParserKeyword{*keyword}.ignore(pos, expected)) + { + ASTPtr ast; + if (table_name_p.parse(pos, ast, expected)) + { + result().setTableID(kind, ast->as()->getTableId()); + break; + } + } + pos = current; + } + if (pos == start) + break; + } + + if (!res || res->targets.empty()) + return false; + + node = res; + return true; +} + +} diff --git a/src/Parsers/ParserViewTargets.h b/src/Parsers/ParserViewTargets.h new file mode 100644 index 00000000000..3af3c0b8df3 --- /dev/null +++ b/src/Parsers/ParserViewTargets.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/// Parses information about target tables (external or inner) of a materialized view or a window view. +/// The function parses one or multiple parts of a CREATE query looking like this: +/// TO db.table_name +/// TO INNER UUID 'XXX' +/// {ENGINE / INNER ENGINE} TableEngine(arguments) [ORDER BY ...] [SETTINGS ...] +/// Returns ASTViewTargets if succeeded. +class ParserViewTargets : public IParserBase +{ +public: + ParserViewTargets(); + explicit ParserViewTargets(const std::vector & accept_kinds_) : accept_kinds(accept_kinds_) { } + +protected: + const char * getName() const override { return "ViewTargets"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + + std::vector accept_kinds; +}; + +} diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index ab599331a9d..4fedacbb48a 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -77,7 +77,6 @@ namespace ErrorCodes extern const int INVALID_JOIN_ON_EXPRESSION; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; - extern const int SYNTAX_ERROR; extern const int ACCESS_DENIED; extern const int PARAMETER_OUT_OF_BOUND; extern const int TOO_MANY_COLUMNS; @@ -1397,12 +1396,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ { if (!join_clause.hasASOF()) throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "JOIN {} no inequality in ASOF JOIN ON section.", - join_node.formatASTForErrorMessage()); - - if (table_join_clause.key_names_left.size() <= 1) - throw Exception(ErrorCodes::SYNTAX_ERROR, - "JOIN {} ASOF join needs at least one equi-join column", + "JOIN {} no inequality in ASOF JOIN ON section", join_node.formatASTForErrorMessage()); } @@ -1524,7 +1518,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ { const auto & join_clause = table_join->getOnlyClause(); - bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind); + bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any) + && (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind)); + auto has_non_const = [](const Block & block, const auto & keys) { @@ -1544,7 +1540,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left) && has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right); - if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys) + if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys) { auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left); auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right); diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index fb3b2faa9c5..e96a75d277b 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -34,13 +34,20 @@ namespace ErrorCodes namespace { -FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns) +FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, JoinStrictness strictness) { SortDescription desc; desc.reserve(columns.size()); for (const auto & name : columns) desc.emplace_back(name); - return std::make_unique(block, desc); + return std::make_unique(block, desc, strictness == JoinStrictness::Asof); +} + +bool ALWAYS_INLINE isNullAt(const IColumn & column, size_t row) +{ + if (const auto * nullable_column = checkAndGetColumn(&column)) + return nullable_column->isNullAt(row); + return false; } template @@ -54,7 +61,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, if (left_nullable && right_nullable) { int res = left_nullable->compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint); - if (res) + if (res != 0) return res; /// NULL != NULL case @@ -90,9 +97,10 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos, const SortCursorImpl & rhs, size_t rpos, + size_t key_length, int null_direction_hint) { - for (size_t i = 0; i < lhs.sort_columns_size; ++i) + for (size_t i = 0; i < key_length; ++i) { /// TODO(@vdimir): use nullableCompareAt only if there's nullable columns int cmp = nullableCompareAt(*lhs.sort_columns[i], *rhs.sort_columns[i], lpos, rpos, null_direction_hint); @@ -104,13 +112,18 @@ int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos, int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImpl & rhs, int null_direction_hint) { - return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), null_direction_hint); + return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint); +} + +int compareAsofCursors(const FullMergeJoinCursor & lhs, const FullMergeJoinCursor & rhs, int null_direction_hint) +{ + return nullableCompareAt(*lhs.getAsofColumn(), *rhs.getAsofColumn(), lhs->getRow(), rhs->getRow(), null_direction_hint); } bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint) { /// The last row of left cursor is less than the current row of the right cursor. - int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), null_direction_hint); + int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint); return cmp < 0; } @@ -222,25 +235,136 @@ Chunk getRowFromChunk(const Chunk & chunk, size_t pos) return result; } -void inline addRange(PaddedPODArray & left_map, size_t start, size_t end) +void inline addRange(PaddedPODArray & values, UInt64 start, UInt64 end) { assert(end > start); - for (size_t i = start; i < end; ++i) - left_map.push_back(i); + for (UInt64 i = start; i < end; ++i) + values.push_back(i); } -void inline addMany(PaddedPODArray & left_or_right_map, size_t idx, size_t num) +void inline addMany(PaddedPODArray & values, UInt64 value, size_t num) { - for (size_t i = 0; i < num; ++i) - left_or_right_map.push_back(idx); + values.resize_fill(values.size() + num, value); } } -FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_) - : sample_block(materializeBlock(sample_block_).cloneEmpty()), desc(description_) +JoinKeyRow::JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos) { + row.reserve(cursor->sort_columns.size()); + for (const auto & col : cursor->sort_columns) + { + auto new_col = col->cloneEmpty(); + new_col->insertFrom(*col, pos); + row.push_back(std::move(new_col)); + } + if (const IColumn * asof_column = cursor.getAsofColumn()) + { + if (const auto * nullable_asof_column = checkAndGetColumn(asof_column)) + { + /// We save matched column, and since NULL do not match anything, we can't use it as a key + chassert(!nullable_asof_column->isNullAt(pos)); + asof_column = nullable_asof_column->getNestedColumnPtr().get(); + } + auto new_col = asof_column->cloneEmpty(); + new_col->insertFrom(*asof_column, pos); + row.push_back(std::move(new_col)); + } } +void JoinKeyRow::reset() +{ + row.clear(); +} + +bool JoinKeyRow::equals(const FullMergeJoinCursor & cursor) const +{ + if (row.empty()) + return false; + + for (size_t i = 0; i < cursor->sort_columns_size; ++i) + { + // int cmp = this->row[i]->compareAt(0, cursor->getRow(), *(cursor->sort_columns[i]), cursor->desc[i].nulls_direction); + int cmp = nullableCompareAt(*this->row[i], *cursor->sort_columns[i], 0, cursor->getRow(), cursor->desc[i].nulls_direction); + if (cmp != 0) + return false; + } + return true; +} + +bool JoinKeyRow::asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const +{ + chassert(this->row.size() == cursor->sort_columns_size + 1); + if (!equals(cursor)) + return false; + + const auto & asof_row = row.back(); + if (isNullAt(*asof_row, 0) || isNullAt(*cursor.getAsofColumn(), cursor->getRow())) + return false; + + int cmp = 0; + if (const auto * nullable_column = checkAndGetColumn(cursor.getAsofColumn())) + cmp = nullable_column->getNestedColumn().compareAt(cursor->getRow(), 0, *asof_row, 1); + else + cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *asof_row, 1); + + return (asof_inequality == ASOFJoinInequality::Less && cmp < 0) + || (asof_inequality == ASOFJoinInequality::LessOrEquals && cmp <= 0) + || (asof_inequality == ASOFJoinInequality::Greater && cmp > 0) + || (asof_inequality == ASOFJoinInequality::GreaterOrEquals && cmp >= 0); +} + +void AnyJoinState::set(size_t source_num, const FullMergeJoinCursor & cursor) +{ + assert(cursor->rows); + keys[source_num] = JoinKeyRow(cursor, cursor->rows - 1); +} + +void AnyJoinState::reset(size_t source_num) +{ + keys[source_num].reset(); + value.clear(); +} + +void AnyJoinState::setValue(Chunk value_) +{ + value = std::move(value_); +} + +bool AnyJoinState::empty() const { return keys[0].row.empty() && keys[1].row.empty(); } + + +void AsofJoinState::set(const FullMergeJoinCursor & rcursor, size_t rpos) +{ + key = JoinKeyRow(rcursor, rpos); + value = rcursor.getCurrent().clone(); + value_row = rpos; +} + +void AsofJoinState::reset() +{ + key.reset(); + value.clear(); +} + +FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof) + : sample_block(materializeBlock(sample_block_).cloneEmpty()) + , desc(description_) +{ + if (desc.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty sort description for FullMergeJoinCursor"); + + if (is_asof) + { + /// For ASOF join prefix of sort description is used for equality comparison + /// and the last column is used for inequality comparison and is handled separately + + auto asof_column_description = desc.back(); + desc.pop_back(); + + chassert(asof_column_description.direction == 1 && asof_column_description.nulls_direction == 1); + asof_column_position = sample_block.getPositionByName(asof_column_description.column_name); + } +} const Chunk & FullMergeJoinCursor::getCurrent() const { @@ -278,48 +402,103 @@ bool FullMergeJoinCursor::fullyCompleted() const return !cursor.isValid() && recieved_all_blocks; } +String FullMergeJoinCursor::dump() const +{ + Strings row_dump; + if (cursor.isValid()) + { + Field val; + for (size_t i = 0; i < cursor.sort_columns_size; ++i) + { + cursor.sort_columns[i]->get(cursor.getRow(), val); + row_dump.push_back(val.dump()); + } + + if (const auto * asof_column = getAsofColumn()) + { + asof_column->get(cursor.getRow(), val); + row_dump.push_back(val.dump()); + } + } + + return fmt::format("<{}/{}{}>[{}]", + cursor.getRow(), cursor.rows, + recieved_all_blocks ? "(finished)" : "", + fmt::join(row_dump, ", ")); +} + MergeJoinAlgorithm::MergeJoinAlgorithm( - JoinPtr table_join_, + JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, const Blocks & input_headers, size_t max_block_size_) - : table_join(table_join_) + : kind(kind_) + , strictness(strictness_) , max_block_size(max_block_size_) , log(getLogger("MergeJoinAlgorithm")) { if (input_headers.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs"); - auto strictness = table_join->getTableJoin().strictness(); - if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All) + if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All && strictness != JoinStrictness::Asof) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for strictness {}", strictness); - auto kind = table_join->getTableJoin().kind(); + if (strictness == JoinStrictness::Asof) + { + if (kind != JoinKind::Left && kind != JoinKind::Inner) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not implement ASOF {} join", kind); + } + if (!isInner(kind) && !isLeft(kind) && !isRight(kind) && !isFull(kind)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for kind {}", kind); - const auto & join_on = table_join->getTableJoin().getOnlyClause(); - - if (join_on.on_filter_condition_left || join_on.on_filter_condition_right) + if (on_clause_.on_filter_condition_left || on_clause_.on_filter_condition_right) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions"); cursors = { - createCursor(input_headers[0], join_on.key_names_left), - createCursor(input_headers[1], join_on.key_names_right) + createCursor(input_headers[0], on_clause_.key_names_left, strictness), + createCursor(input_headers[1], on_clause_.key_names_right, strictness), }; +} - for (const auto & [left_key, right_key] : table_join->getTableJoin().leftToRightKeyRemap()) +MergeJoinAlgorithm::MergeJoinAlgorithm( + JoinPtr join_ptr, + const Blocks & input_headers, + size_t max_block_size_) + : MergeJoinAlgorithm( + join_ptr->getTableJoin().kind(), + join_ptr->getTableJoin().strictness(), + join_ptr->getTableJoin().getOnlyClause(), + input_headers, + max_block_size_) +{ + for (const auto & [left_key, right_key] : join_ptr->getTableJoin().leftToRightKeyRemap()) { size_t left_idx = input_headers[0].getPositionByName(left_key); size_t right_idx = input_headers[1].getPositionByName(right_key); left_to_right_key_remap[left_idx] = right_idx; } - const auto *smjPtr = typeid_cast(table_join.get()); + const auto *smjPtr = typeid_cast(join_ptr.get()); if (smjPtr) { null_direction_hint = smjPtr->getNullDirection(); } + if (strictness == JoinStrictness::Asof) + setAsofInequality(join_ptr->getTableJoin().getAsofInequality()); +} + +void MergeJoinAlgorithm::setAsofInequality(ASOFJoinInequality asof_inequality_) +{ + if (strictness != JoinStrictness::Asof) + throw Exception(ErrorCodes::LOGICAL_ERROR, "setAsofInequality is only supported for ASOF joins"); + + if (asof_inequality_ == ASOFJoinInequality::None) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ASOF inequality cannot be None"); + + asof_inequality = asof_inequality_; } void MergeJoinAlgorithm::logElapsed(double seconds) @@ -407,7 +586,7 @@ struct AllJoinImpl size_t lnum = nextDistinct(left_cursor.cursor); size_t rnum = nextDistinct(right_cursor.cursor); - bool all_fit_in_block = std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size; + bool all_fit_in_block = !max_block_size || std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size; bool have_all_ranges = left_cursor.cursor.isValid() && right_cursor.cursor.isValid(); if (all_fit_in_block && have_all_ranges) { @@ -421,7 +600,7 @@ struct AllJoinImpl else { assert(state == nullptr); - state = std::make_unique(left_cursor.cursor, lpos, right_cursor.cursor, rpos); + state = std::make_unique(left_cursor, lpos, right_cursor, rpos); state->addRange(0, left_cursor.getCurrent().clone(), lpos, lnum); state->addRange(1, right_cursor.getCurrent().clone(), rpos, rnum); return; @@ -466,6 +645,17 @@ void dispatchKind(JoinKind kind, Args && ... args) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported join kind: \"{}\"", kind); } +MutableColumns MergeJoinAlgorithm::getEmptyResultColumns() const +{ + MutableColumns result_cols; + for (size_t i = 0; i < 2; ++i) + { + for (const auto & col : cursors[i]->sampleColumns()) + result_cols.push_back(col->cloneEmpty()); + } + return result_cols; +} + std::optional MergeJoinAlgorithm::handleAllJoinState() { if (all_join_state && all_join_state->finished()) @@ -479,7 +669,7 @@ std::optional MergeJoinAlgorithm::handleAllJoinState /// Accumulate blocks with same key in all_join_state for (size_t i = 0; i < 2; ++i) { - if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(cursors[i]->cursor)) + if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(*cursors[i])) { size_t pos = cursors[i]->cursor.getRow(); size_t num = nextDistinct(cursors[i]->cursor); @@ -499,15 +689,10 @@ std::optional MergeJoinAlgorithm::handleAllJoinState stat.max_blocks_loaded = std::max(stat.max_blocks_loaded, all_join_state->blocksStored()); /// join all rows with current key - MutableColumns result_cols; - for (size_t i = 0; i < 2; ++i) - { - for (const auto & col : cursors[i]->sampleColumns()) - result_cols.push_back(col->cloneEmpty()); - } + MutableColumns result_cols = getEmptyResultColumns(); size_t total_rows = 0; - while (total_rows < max_block_size) + while (!max_block_size || total_rows < max_block_size) { const auto & left_range = all_join_state->getLeft(); const auto & right_range = all_join_state->getRight(); @@ -532,7 +717,52 @@ std::optional MergeJoinAlgorithm::handleAllJoinState return {}; } -MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin(JoinKind kind) +std::optional MergeJoinAlgorithm::handleAsofJoinState() +{ + if (strictness != JoinStrictness::Asof) + return {}; + + if (!cursors[1]->fullyCompleted()) + return {}; + + auto & left_cursor = *cursors[0]; + const auto & left_columns = left_cursor.getCurrent().getColumns(); + + MutableColumns result_cols = getEmptyResultColumns(); + + while (left_cursor->isValid() && asof_join_state.hasMatch(left_cursor, asof_inequality)) + { + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, left_cursor->getRow()); + for (const auto & col : asof_join_state.value.getColumns()) + result_cols[i++]->insertFrom(*col, asof_join_state.value_row); + chassert(i == result_cols.size()); + left_cursor->next(); + } + + while (isLeft(kind) && left_cursor->isValid()) + { + /// return row with default values at right side + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, left_cursor->getRow()); + for (; i < result_cols.size(); ++i) + result_cols[i]->insertDefault(); + chassert(i == result_cols.size()); + + left_cursor->next(); + } + + size_t result_rows = result_cols.empty() ? 0 : result_cols.front()->size(); + if (result_rows) + return Status(Chunk(std::move(result_cols), result_rows)); + + return {}; +} + + +MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin() { PaddedPODArray idx_map[2]; @@ -595,7 +825,7 @@ struct AnyJoinImpl FullMergeJoinCursor & right_cursor, PaddedPODArray & left_map, PaddedPODArray & right_map, - AnyJoinState & state, + AnyJoinState & any_join_state, int null_direction_hint) { assert(enabled); @@ -656,21 +886,21 @@ struct AnyJoinImpl } } - /// Remember index of last joined row to propagate it to next block + /// Remember last joined row to propagate it to next block - state.setValue({}); + any_join_state.setValue({}); if (!left_cursor->isValid()) { - state.set(0, left_cursor.cursor); + any_join_state.set(0, left_cursor); if (cmp == 0 && isLeft(kind)) - state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos)); + any_join_state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos)); } if (!right_cursor->isValid()) { - state.set(1, right_cursor.cursor); + any_join_state.set(1, right_cursor); if (cmp == 0 && isRight(kind)) - state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos)); + any_join_state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos)); } } }; @@ -680,40 +910,34 @@ std::optional MergeJoinAlgorithm::handleAnyJoinState if (any_join_state.empty()) return {}; - auto kind = table_join->getTableJoin().kind(); - Chunk result; for (size_t source_num = 0; source_num < 2; ++source_num) { auto & current = *cursors[source_num]; - auto & state = any_join_state; - if (any_join_state.keys[source_num].equals(current.cursor)) + if (any_join_state.keys[source_num].equals(current)) { size_t start_pos = current->getRow(); size_t length = nextDistinct(current.cursor); if (length && isLeft(kind) && source_num == 0) { - if (state.value) - result = copyChunkResized(current.getCurrent(), state.value, start_pos, length); + if (any_join_state.value) + result = copyChunkResized(current.getCurrent(), any_join_state.value, start_pos, length); else result = createBlockWithDefaults(source_num, start_pos, length); } if (length && isRight(kind) && source_num == 1) { - if (state.value) - result = copyChunkResized(state.value, current.getCurrent(), start_pos, length); + if (any_join_state.value) + result = copyChunkResized(any_join_state.value, current.getCurrent(), start_pos, length); else result = createBlockWithDefaults(source_num, start_pos, length); } - /// We've found row with other key, no need to skip more rows with current key if (current->isValid()) - { - state.keys[source_num].reset(); - } + any_join_state.keys[source_num].reset(); } else { @@ -726,7 +950,7 @@ std::optional MergeJoinAlgorithm::handleAnyJoinState return {}; } -MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind) +MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin() { if (auto result = handleAnyJoinState()) return std::move(*result); @@ -771,10 +995,151 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind) return Status(std::move(result)); } + +MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() +{ + auto & left_cursor = *cursors[0]; + if (!left_cursor->isValid()) + return Status(0); + + auto & right_cursor = *cursors[1]; + if (!right_cursor->isValid()) + return Status(1); + + const auto & left_columns = left_cursor.getCurrent().getColumns(); + const auto & right_columns = right_cursor.getCurrent().getColumns(); + + MutableColumns result_cols = getEmptyResultColumns(); + + while (left_cursor->isValid() && right_cursor->isValid()) + { + auto lpos = left_cursor->getRow(); + auto rpos = right_cursor->getRow(); + auto cmp = compareCursors(*left_cursor, *right_cursor, null_direction_hint); + if (cmp == 0) + { + if (isNullAt(*left_cursor.getAsofColumn(), lpos)) + cmp = -1; + if (isNullAt(*right_cursor.getAsofColumn(), rpos)) + cmp = 1; + } + + if (cmp == 0) + { + auto asof_cmp = compareAsofCursors(left_cursor, right_cursor, null_direction_hint); + + if ((asof_inequality == ASOFJoinInequality::Less && asof_cmp <= -1) + || (asof_inequality == ASOFJoinInequality::LessOrEquals && asof_cmp <= 0)) + { + /// First row in right table that is greater (or equal) than current row in left table + /// matches asof join condition the best + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (const auto & col : right_columns) + result_cols[i++]->insertFrom(*col, rpos); + chassert(i == result_cols.size()); + + left_cursor->next(); + continue; + } + + if (asof_inequality == ASOFJoinInequality::Less || asof_inequality == ASOFJoinInequality::LessOrEquals) + { + /// Asof condition is not (yet) satisfied, skip row in right table + right_cursor->next(); + continue; + } + + if ((asof_inequality == ASOFJoinInequality::Greater && asof_cmp >= 1) + || (asof_inequality == ASOFJoinInequality::GreaterOrEquals && asof_cmp >= 0)) + { + /// condition is satisfied, remember this row and move next to try to find better match + asof_join_state.set(right_cursor, rpos); + right_cursor->next(); + continue; + } + + if (asof_inequality == ASOFJoinInequality::Greater || asof_inequality == ASOFJoinInequality::GreaterOrEquals) + { + /// Asof condition is not satisfied anymore, use last matched row from right table + if (asof_join_state.hasMatch(left_cursor, asof_inequality)) + { + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (const auto & col : asof_join_state.value.getColumns()) + result_cols[i++]->insertFrom(*col, asof_join_state.value_row); + chassert(i == result_cols.size()); + } + else + { + asof_join_state.reset(); + if (isLeft(kind)) + { + /// return row with default values at right side + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (; i < result_cols.size(); ++i) + result_cols[i]->insertDefault(); + chassert(i == result_cols.size()); + } + } + left_cursor->next(); + continue; + } + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO: implement ASOF equality join"); + } + else if (cmp < 0) + { + if (asof_join_state.hasMatch(left_cursor, asof_inequality)) + { + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (const auto & col : asof_join_state.value.getColumns()) + result_cols[i++]->insertFrom(*col, asof_join_state.value_row); + chassert(i == result_cols.size()); + left_cursor->next(); + continue; + } + else + { + asof_join_state.reset(); + } + + /// no matches for rows in left table, just pass them through + size_t num = nextDistinct(*left_cursor); + + if (isLeft(kind) && num) + { + /// return them with default values at right side + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertRangeFrom(*col, lpos, num); + for (; i < result_cols.size(); ++i) + result_cols[i]->insertManyDefaults(num); + chassert(i == result_cols.size()); + } + } + else + { + /// skip rows in right table until we find match for current row in left table + nextDistinct(*right_cursor); + } + } + size_t num_rows = result_cols.empty() ? 0 : result_cols.front()->size(); + return Status(Chunk(std::move(result_cols), num_rows)); +} + + /// if `source_num == 0` get data from left cursor and fill defaults at right /// otherwise - vice versa Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const { + ColumnRawPtrs cols; { const auto & columns_left = source_num == 0 ? cursors[0]->getCurrent().getColumns() : cursors[0]->sampleColumns(); @@ -797,7 +1162,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t star cols.push_back(col.get()); } } - Chunk result_chunk; copyColumnsResized(cols, start, num_rows, result_chunk); return result_chunk; @@ -813,7 +1177,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num) IMergingAlgorithm::Status MergeJoinAlgorithm::merge() { - auto kind = table_join->getTableJoin().kind(); if (!cursors[0]->cursor.isValid() && !cursors[0]->fullyCompleted()) return Status(0); @@ -821,11 +1184,11 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted()) return Status(1); - if (auto result = handleAllJoinState()) - { return std::move(*result); - } + + if (auto result = handleAsofJoinState()) + return std::move(*result); if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted()) { @@ -839,7 +1202,7 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() } /// check if blocks are not intersecting at all - if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0) + if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0 && strictness != JoinStrictness::Asof) { if (cmp < 0) { @@ -858,13 +1221,14 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() } } - auto strictness = table_join->getTableJoin().strictness(); - if (strictness == JoinStrictness::Any) - return anyJoin(kind); + return anyJoin(); if (strictness == JoinStrictness::All) - return allJoin(kind); + return allJoin(); + + if (strictness == JoinStrictness::Asof) + return asofJoin(); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported strictness '{}'", strictness); } @@ -883,9 +1247,26 @@ MergeJoinTransform::MergeJoinTransform( /* always_read_till_end_= */ false, /* empty_chunk_on_finish_= */ true, table_join, input_headers, max_block_size) - , log(getLogger("MergeJoinTransform")) { - LOG_TRACE(log, "Use MergeJoinTransform"); +} + +MergeJoinTransform::MergeJoinTransform( + JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, + const Blocks & input_headers, + const Block & output_header, + size_t max_block_size, + UInt64 limit_hint_) + : IMergingTransform( + input_headers, + output_header, + /* have_all_inputs_= */ true, + limit_hint_, + /* always_read_till_end_= */ false, + /* empty_chunk_on_finish_= */ true, + kind_, strictness_, on_clause_, input_headers, max_block_size) +{ } void MergeJoinTransform::onFinish() diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index 5ca6b076544..d37a0b9f3ae 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -19,6 +20,7 @@ #include #include #include +#include namespace Poco { class Logger; } @@ -35,57 +37,28 @@ using FullMergeJoinCursorPtr = std::unique_ptr; /// Used instead of storing previous block struct JoinKeyRow { - std::vector row; - JoinKeyRow() = default; - explicit JoinKeyRow(const SortCursorImpl & impl_, size_t pos) - { - row.reserve(impl_.sort_columns.size()); - for (const auto & col : impl_.sort_columns) - { - auto new_col = col->cloneEmpty(); - new_col->insertFrom(*col, pos); - row.push_back(std::move(new_col)); - } - } + JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos); - void reset() - { - row.clear(); - } + bool equals(const FullMergeJoinCursor & cursor) const; + bool asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const; - bool equals(const SortCursorImpl & impl) const - { - if (row.empty()) - return false; + void reset(); - assert(this->row.size() == impl.sort_columns_size); - for (size_t i = 0; i < impl.sort_columns_size; ++i) - { - int cmp = this->row[i]->compareAt(0, impl.getRow(), *impl.sort_columns[i], impl.desc[i].nulls_direction); - if (cmp != 0) - return false; - } - return true; - } + std::vector row; }; /// Remembers previous key if it was joined in previous block class AnyJoinState : boost::noncopyable { public: - AnyJoinState() = default; + void set(size_t source_num, const FullMergeJoinCursor & cursor); + void setValue(Chunk value_); - void set(size_t source_num, const SortCursorImpl & cursor) - { - assert(cursor.rows); - keys[source_num] = JoinKeyRow(cursor, cursor.rows - 1); - } + void reset(size_t source_num); - void setValue(Chunk value_) { value = std::move(value_); } - - bool empty() const { return keys[0].row.empty() && keys[1].row.empty(); } + bool empty() const; /// current keys JoinKeyRow keys[2]; @@ -118,8 +91,8 @@ public: Chunk chunk; }; - AllJoinState(const SortCursorImpl & lcursor, size_t lpos, - const SortCursorImpl & rcursor, size_t rpos) + AllJoinState(const FullMergeJoinCursor & lcursor, size_t lpos, + const FullMergeJoinCursor & rcursor, size_t rpos) : keys{JoinKeyRow(lcursor, lpos), JoinKeyRow(rcursor, rpos)} { } @@ -187,13 +160,32 @@ private: size_t ridx = 0; }; + +class AsofJoinState : boost::noncopyable +{ +public: + void set(const FullMergeJoinCursor & rcursor, size_t rpos); + void reset(); + + bool hasMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const + { + if (value.empty()) + return false; + return key.asofMatch(cursor, asof_inequality); + } + + JoinKeyRow key; + Chunk value; + size_t value_row = 0; +}; + /* * Wrapper for SortCursorImpl */ class FullMergeJoinCursor : boost::noncopyable { public: - explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_); + explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof = false); bool fullyCompleted() const; void setChunk(Chunk && chunk); @@ -203,17 +195,31 @@ public: SortCursorImpl * operator-> () { return &cursor; } const SortCursorImpl * operator-> () const { return &cursor; } + SortCursorImpl & operator* () { return cursor; } + const SortCursorImpl & operator* () const { return cursor; } + SortCursorImpl cursor; const Block & sampleBlock() const { return sample_block; } Columns sampleColumns() const { return sample_block.getColumns(); } + const IColumn * getAsofColumn() const + { + if (!asof_column_position) + return nullptr; + return cursor.all_columns[*asof_column_position]; + } + + String dump() const; + private: Block sample_block; SortDescription desc; Chunk current_chunk; bool recieved_all_blocks = false; + + std::optional asof_column_position; }; /* @@ -223,22 +229,33 @@ private: class MergeJoinAlgorithm final : public IMergingAlgorithm { public: - explicit MergeJoinAlgorithm(JoinPtr table_join, const Blocks & input_headers, size_t max_block_size_); + MergeJoinAlgorithm(JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, + const Blocks & input_headers, + size_t max_block_size_); + + MergeJoinAlgorithm(JoinPtr join_ptr, const Blocks & input_headers, size_t max_block_size_); const char * getName() const override { return "MergeJoinAlgorithm"; } void initialize(Inputs inputs) override; void consume(Input & input, size_t source_num) override; Status merge() override; - void logElapsed(double seconds); + void setAsofInequality(ASOFJoinInequality asof_inequality_); + void logElapsed(double seconds); private: std::optional handleAnyJoinState(); - Status anyJoin(JoinKind kind); + Status anyJoin(); std::optional handleAllJoinState(); - Status allJoin(JoinKind kind); + Status allJoin(); + std::optional handleAsofJoinState(); + Status asofJoin(); + + MutableColumns getEmptyResultColumns() const; Chunk createBlockWithDefaults(size_t source_num); Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const; @@ -246,12 +263,15 @@ private: std::unordered_map left_to_right_key_remap; std::array cursors; + ASOFJoinInequality asof_inequality = ASOFJoinInequality::None; - /// Keep some state to make connection between data in different blocks + /// Keep some state to make handle data from different blocks AnyJoinState any_join_state; std::unique_ptr all_join_state; + AsofJoinState asof_join_state; - JoinPtr table_join; + JoinKind kind; + JoinStrictness strictness; size_t max_block_size; int null_direction_hint = 1; @@ -281,12 +301,21 @@ public: size_t max_block_size, UInt64 limit_hint = 0); + MergeJoinTransform( + JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, + const Blocks & input_headers, + const Block & output_header, + size_t max_block_size, + UInt64 limit_hint_ = 0); + String getName() const override { return "MergeJoinTransform"; } + void setAsofInequality(ASOFJoinInequality asof_inequality_) { algorithm.setAsofInequality(asof_inequality_); } + protected: void onFinish() override; - - LoggerPtr log; }; } diff --git a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp index ce5992c2548..364d7c69071 100644 --- a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp +++ b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp @@ -50,7 +50,7 @@ TEST(Processors, PortsNotConnected) processors->emplace_back(std::move(source)); processors->emplace_back(std::move(sink)); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD try { QueryStatusPtr element; diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp new file mode 100644 index 00000000000..f678d7984e8 --- /dev/null +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -0,0 +1,768 @@ +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + + +#include + +using namespace DB; + +namespace +{ + +QueryPipeline buildJoinPipeline( + std::shared_ptr left_source, + std::shared_ptr right_source, + size_t key_length = 1, + JoinKind kind = JoinKind::Inner, + JoinStrictness strictness = JoinStrictness::All, + ASOFJoinInequality asof_inequality = ASOFJoinInequality::None) +{ + Blocks inputs; + inputs.emplace_back(left_source->getPort().getHeader()); + inputs.emplace_back(right_source->getPort().getHeader()); + + Block out_header; + for (const auto & input : inputs) + { + for (ColumnWithTypeAndName column : input) + { + if (&input == &inputs.front()) + column.name = "t1." + column.name; + else + column.name = "t2." + column.name; + out_header.insert(column); + } + } + + TableJoin::JoinOnClause on_clause; + for (size_t i = 0; i < key_length; ++i) + { + on_clause.key_names_left.emplace_back(inputs[0].getByPosition(i).name); + on_clause.key_names_right.emplace_back(inputs[1].getByPosition(i).name); + } + + auto joining = std::make_shared( + kind, + strictness, + on_clause, + inputs, out_header, /* max_block_size = */ 0); + + if (asof_inequality != ASOFJoinInequality::None) + joining->setAsofInequality(asof_inequality); + + chassert(joining->getInputs().size() == 2); + + connect(left_source->getPort(), joining->getInputs().front()); + connect(right_source->getPort(), joining->getInputs().back()); + + auto * output_port = &joining->getOutputPort(); + + auto processors = std::make_shared(); + processors->emplace_back(std::move(left_source)); + processors->emplace_back(std::move(right_source)); + processors->emplace_back(std::move(joining)); + + QueryPipeline pipeline(QueryPlanResourceHolder{}, processors, output_port); + return pipeline; +} + + +std::shared_ptr oneColumnSource(const std::vector> & values) +{ + Block header = { + ColumnWithTypeAndName(std::make_shared(), "key"), + ColumnWithTypeAndName(std::make_shared(), "idx"), + }; + + UInt64 idx = 0; + Chunks chunks; + for (const auto & chunk_values : values) + { + auto key_column = ColumnUInt64::create(); + auto idx_column = ColumnUInt64::create(); + + for (auto n : chunk_values) + { + key_column->insertValue(n); + idx_column->insertValue(idx); + ++idx; + } + chunks.emplace_back(Chunk(Columns{std::move(key_column), std::move(idx_column)}, chunk_values.size())); + } + return std::make_shared(header, std::move(chunks)); +} + +class SourceChunksBuilder +{ +public: + + explicit SourceChunksBuilder(const Block & header_) + : header(header_) + { + current_chunk = header.cloneEmptyColumns(); + chassert(!current_chunk.empty()); + } + + void setBreakProbability(pcg64 & rng_) + { + /// random probability with possibility to have exact 0.0 and 1.0 values + break_prob = std::uniform_int_distribution(0, 5)(rng_) / static_cast(5); + rng = &rng_; + } + + void addRow(const std::vector & row) + { + chassert(row.size() == current_chunk.size()); + for (size_t i = 0; i < current_chunk.size(); ++i) + current_chunk[i]->insert(row[i]); + + if (rng && std::uniform_real_distribution<>(0.0, 1.0)(*rng) < break_prob) + addChunk(); + } + + void addChunk() + { + if (current_chunk.front()->empty()) + return; + + size_t rows = current_chunk.front()->size(); + chunks.emplace_back(std::move(current_chunk), rows); + current_chunk = header.cloneEmptyColumns(); + } + + std::shared_ptr getSource() + { + addChunk(); + + /// copy chunk to allow reusing same builder + Chunks chunks_copy; + chunks_copy.reserve(chunks.size()); + for (const auto & chunk : chunks) + chunks_copy.emplace_back(chunk.clone()); + return std::make_shared(header, std::move(chunks_copy)); + } + +private: + Block header; + Chunks chunks; + MutableColumns current_chunk; + + pcg64 * rng = nullptr; + double break_prob = 0.0; +}; + + +std::vector> getValuesFromBlock(const Block & block, const Names & names) +{ + std::vector> result; + for (size_t i = 0; i < block.rows(); ++i) + { + auto & row = result.emplace_back(); + for (const auto & name : names) + block.getByName(name).column->get(i, row.emplace_back()); + } + return result; +} + + +Block executePipeline(QueryPipeline && pipeline) +{ + PullingPipelineExecutor executor(pipeline); + + Blocks result_blocks; + while (true) + { + Block block; + bool is_ok = executor.pull(block); + if (!is_ok) + break; + result_blocks.emplace_back(std::move(block)); + } + + return concatenateBlocks(result_blocks); +} + +template +void assertColumnVectorEq(const typename ColumnVector::Container & expected, const Block & block, const std::string & name) +{ + const auto * actual = typeid_cast *>(block.getByName(name).column.get()); + ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector).name(); + + auto get_first_diff = [&]() -> String + { + const auto & actual_data = actual->getData(); + size_t num_rows = std::min(expected.size(), actual_data.size()); + for (size_t i = 0; i < num_rows; ++i) + { + if (expected[i] != actual_data[i]) + return fmt::format(", expected: {}, actual: {} at row {}", expected[i], actual_data[i], i); + } + return ""; + }; + + EXPECT_EQ(actual->getData().size(), expected.size()); + ASSERT_EQ(actual->getData(), expected) << "column name: " << name << get_first_diff(); +} + +template +void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name) +{ + const ColumnPtr & actual = block.getByName(name).column; + ASSERT_TRUE(checkColumn(*actual)); + ASSERT_TRUE(checkColumn(expected)); + EXPECT_EQ(actual->size(), expected.size()); + + auto dump_val = [](const IColumn & col, size_t i) -> String + { + Field value; + col.get(i, value); + return value.dump(); + }; + + size_t num_rows = std::min(actual->size(), expected.size()); + for (size_t i = 0; i < num_rows; ++i) + ASSERT_EQ(actual->compareAt(i, i, expected, 1), 0) << dump_val(*actual, i) << " != " << dump_val(expected, i) << " at row " << i; +} + +template +T getRandomFrom(pcg64 & rng, const std::initializer_list & opts) +{ + std::vector options(opts.begin(), opts.end()); + size_t idx = std::uniform_int_distribution(0, options.size() - 1)(rng); + return options[idx]; +} + +void generateNextKey(pcg64 & rng, UInt64 & k1, String & k2) +{ + size_t str_len = std::uniform_int_distribution<>(1, 10)(rng); + String new_k2 = getRandomASCIIString(str_len, rng); + if (new_k2.compare(k2) <= 0) + ++k1; + k2 = new_k2; +} + +bool isStrict(ASOFJoinInequality inequality) +{ + return inequality == ASOFJoinInequality::Less || inequality == ASOFJoinInequality::Greater; +} + +} + +class FullSortingJoinTest : public ::testing::Test +{ +public: + FullSortingJoinTest() = default; + + void SetUp() override + { + Poco::AutoPtr channel(new Poco::ConsoleChannel(std::cerr)); + Poco::Logger::root().setChannel(channel); + if (const char * test_log_level = std::getenv("TEST_LOG_LEVEL")) // NOLINT(concurrency-mt-unsafe) + Poco::Logger::root().setLevel(test_log_level); + else + Poco::Logger::root().setLevel("none"); + + + UInt64 seed = randomSeed(); + if (const char * random_seed = std::getenv("TEST_RANDOM_SEED")) // NOLINT(concurrency-mt-unsafe) + seed = std::stoull(random_seed); + std::cout << "TEST_RANDOM_SEED=" << seed << std::endl; + rng = pcg64(seed); + } + + void TearDown() override + { + } + + pcg64 rng; +}; + +TEST_F(FullSortingJoinTest, AllAnyOneKey) +try +{ + { + SCOPED_TRACE("Inner All"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {1, 2, 3, 4, 5} }), + oneColumnSource({ {1}, {2}, {3}, {4}, {5} }), + 1, JoinKind::Inner, JoinStrictness::All)); + + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {1, 2, 3, 4, 5} }), + oneColumnSource({ {1}, {2}, {3}, {4}, {5} }), + 1, JoinKind::Inner, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner All"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }), + oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }), + 1, JoinKind::Inner, JoinStrictness::All)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 0, 1, 2, 3, 3, 4, 5}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 3, 3, 4, 4, 4, 3, 4, 5, 5}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }), + oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }), + 1, JoinKind::Inner, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 5}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }), + oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }), + 1, JoinKind::Inner, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 5}), result, "t2.idx"); + } + { + + SCOPED_TRACE("Left Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }), + oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }), + 1, JoinKind::Left, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx"); + } + { + SCOPED_TRACE("Left Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }), + oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }), + 1, JoinKind::Left, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx"); + } +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + + +TEST_F(FullSortingJoinTest, AnySimple) +try +{ + JoinKind kind = getRandomFrom(rng, {JoinKind::Inner, JoinKind::Left, JoinKind::Right}); + + SourceChunksBuilder left_source({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "attr"}, + }); + + SourceChunksBuilder right_source({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "attr"}, + }); + + left_source.setBreakProbability(rng); + right_source.setBreakProbability(rng); + + size_t num_keys = std::uniform_int_distribution<>(100, 1000)(rng); + + auto expected_left = ColumnString::create(); + auto expected_right = ColumnString::create(); + + UInt64 k1 = 1; + String k2; + + auto get_attr = [&](const String & side, size_t idx) -> String + { + return toString(k1) + "_" + k2 + "_" + side + "_" + toString(idx); + }; + + for (size_t i = 0; i < num_keys; ++i) + { + generateNextKey(rng, k1, k2); + + /// Key is present in left, right or both tables. Both tables is more probable. + size_t key_presence = std::uniform_int_distribution<>(0, 10)(rng); + + size_t num_rows_left = key_presence == 0 ? 0 : std::uniform_int_distribution<>(1, 10)(rng); + for (size_t j = 0; j < num_rows_left; ++j) + left_source.addRow({k1, k2, get_attr("left", j)}); + + size_t num_rows_right = key_presence == 1 ? 0 : std::uniform_int_distribution<>(1, 10)(rng); + for (size_t j = 0; j < num_rows_right; ++j) + right_source.addRow({k1, k2, get_attr("right", j)}); + + String left_attr = num_rows_left ? get_attr("left", 0) : ""; + String right_attr = num_rows_right ? get_attr("right", 0) : ""; + + if (kind == JoinKind::Inner && num_rows_left && num_rows_right) + { + expected_left->insert(left_attr); + expected_right->insert(right_attr); + } + else if (kind == JoinKind::Left) + { + for (size_t j = 0; j < num_rows_left; ++j) + { + expected_left->insert(get_attr("left", j)); + expected_right->insert(right_attr); + } + } + else if (kind == JoinKind::Right) + { + for (size_t j = 0; j < num_rows_right; ++j) + { + expected_left->insert(left_attr); + expected_right->insert(get_attr("right", j)); + } + } + } + + Block result_block = executePipeline(buildJoinPipeline( + left_source.getSource(), right_source.getSource(), /* key_length = */ 2, + kind, JoinStrictness::Any)); + assertColumnEq(*expected_left, result_block, "t1.attr"); + assertColumnEq(*expected_right, result_block, "t2.attr"); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + +TEST_F(FullSortingJoinTest, AsofSimple) +try +{ + SourceChunksBuilder left_source({ + {std::make_shared(), "key"}, + {std::make_shared(), "t"}, + }); + left_source.addRow({"AMZN", 3}); + left_source.addRow({"AMZN", 4}); + left_source.addRow({"AMZN", 6}); + left_source.addRow({"SBUX", 10}); + + SourceChunksBuilder right_source({ + {std::make_shared(), "key"}, + {std::make_shared(), "t"}, + {std::make_shared(), "value"}, + }); + right_source.addRow({"AAPL", 1, 97}); + right_source.addChunk(); + right_source.addRow({"AAPL", 2, 98}); + right_source.addRow({"AAPL", 3, 99}); + right_source.addRow({"AMZN", 1, 100}); + right_source.addRow({"AMZN", 2, 110}); + right_source.addChunk(); + right_source.addRow({"AMZN", 2, 110}); + right_source.addChunk(); + right_source.addRow({"AMZN", 4, 130}); + right_source.addRow({"AMZN", 5, 140}); + right_source.addRow({"SBUX", 8, 180}); + right_source.addChunk(); + right_source.addRow({"SBUX", 9, 190}); + + { + Block result_block = executePipeline(buildJoinPipeline( + left_source.getSource(), right_source.getSource(), /* key_length = */ 2, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals)); + auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); + + ASSERT_EQ(values, (std::vector>{ + {"AMZN", 3u, 4u, 130u}, + {"AMZN", 4u, 4u, 130u}, + })); + } + + { + Block result_block = executePipeline(buildJoinPipeline( + left_source.getSource(), right_source.getSource(), /* key_length = */ 2, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals)); + auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); + + ASSERT_EQ(values, (std::vector>{ + {"AMZN", 3u, 2u, 110u}, + {"AMZN", 4u, 4u, 130u}, + {"AMZN", 6u, 5u, 140u}, + {"SBUX", 10u, 9u, 190u}, + })); + } +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + + +TEST_F(FullSortingJoinTest, AsofOnlyColumn) +try +{ + auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} }); + + SourceChunksBuilder right_source_builder({ + {std::make_shared(), "t"}, + {std::make_shared(), "value"}, + }); + + right_source_builder.setBreakProbability(rng); + + for (const auto & row : std::vector>{ {1, 101}, {2, 102}, {4, 104}, {5, 105}, {11, 111}, {15, 115} }) + right_source_builder.addRow(row); + + auto right_source = right_source_builder.getSource(); + + auto pipeline = buildJoinPipeline( + left_source, right_source, /* key_length = */ 1, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals); + + Block result_block = executePipeline(std::move(pipeline)); + + ASSERT_EQ( + assert_cast(result_block.getByName("t1.key").column.get())->getData(), + (ColumnUInt64::Container{3, 3, 3, 3, 3, 5, 5, 6, 9, 9, 10}) + ); + + ASSERT_EQ( + assert_cast(result_block.getByName("t2.t").column.get())->getData(), + (ColumnUInt64::Container{4, 4, 4, 4, 4, 5, 5, 11, 11, 11, 11}) + ); + + ASSERT_EQ( + assert_cast(result_block.getByName("t2.value").column.get())->getData(), + (ColumnUInt64::Container{104, 104, 104, 104, 104, 105, 105, 111, 111, 111, 111}) + ); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + +TEST_F(FullSortingJoinTest, AsofLessGeneratedTestData) +try +{ + /// Generate data random and build expected result at the same time. + + /// Test specific combinations of join kind and inequality per each run + auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); + auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals }); + + SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality)); + + /// Key is complex, `k1, k2` for equality and `t` for asof + SourceChunksBuilder left_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + SourceChunksBuilder right_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + /// How small generated block should be + left_source_builder.setBreakProbability(rng); + right_source_builder.setBreakProbability(rng); + + /// We are going to generate sorted data and remember expected result + ColumnInt64::Container expected; + + UInt64 k1 = 1; + String k2; + auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); + for (size_t key_num = 0; key_num < key_num_total; ++key_num) + { + /// Generate new key greater than previous + generateNextKey(rng, k1, k2); + + Int64 left_t = 0; + /// Generate several rows for the key + size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng); + for (size_t i = 0; i < num_left_rows; ++i) + { + /// t is strictly greater than previous + left_t += std::uniform_int_distribution<>(1, 10)(rng); + + auto left_arrtibute_value = 10 * left_t; + left_source_builder.addRow({k1, k2, left_t, left_arrtibute_value}); + expected.push_back(left_arrtibute_value); + + auto num_matches = 1 + std::poisson_distribution<>(4)(rng); + /// Generate several matches in the right table + auto right_t = left_t; + for (size_t j = 0; j < num_matches; ++j) + { + int min_step = isStrict(asof_inequality) ? 1 : 0; + right_t += std::uniform_int_distribution<>(min_step, 3)(rng); + + /// First row should match + bool is_match = j == 0; + right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * left_arrtibute_value : -1}); + } + /// Next left_t should be greater than right_t not to match with previous rows + left_t = right_t; + } + + /// generate some rows with greater left_t to check that they are not matched + num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0; + for (size_t i = 0; i < num_left_rows; ++i) + { + left_t += std::uniform_int_distribution<>(1, 10)(rng); + left_source_builder.addRow({k1, k2, left_t, -10 * left_t}); + + if (join_kind == JoinKind::Left) + expected.push_back(-10 * left_t); + } + } + + Block result_block = executePipeline(buildJoinPipeline( + left_source_builder.getSource(), right_source_builder.getSource(), + /* key_length = */ 3, + join_kind, JoinStrictness::Asof, asof_inequality)); + + assertColumnVectorEq(expected, result_block, "t1.attr"); + + for (auto & e : expected) + /// Non matched rows from left table have negative attr + /// Value if attribute in right table is 10 times greater than in left table + e = e < 0 ? 0 : 10 * e; + + assertColumnVectorEq(expected, result_block, "t2.attr"); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + +TEST_F(FullSortingJoinTest, AsofGreaterGeneratedTestData) +try +{ + /// Generate data random and build expected result at the same time. + + /// Test specific combinations of join kind and inequality per each run + auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); + auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals }); + + SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality)); + + SourceChunksBuilder left_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + SourceChunksBuilder right_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + left_source_builder.setBreakProbability(rng); + right_source_builder.setBreakProbability(rng); + + ColumnInt64::Container expected; + + UInt64 k1 = 1; + String k2; + UInt64 left_t = 0; + + auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); + for (size_t key_num = 0; key_num < key_num_total; ++key_num) + { + /// Generate new key greater than previous + generateNextKey(rng, k1, k2); + + /// Generate some rows with smaller left_t to check that they are not matched + size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0; + for (size_t i = 0; i < num_left_rows; ++i) + { + left_t += std::uniform_int_distribution<>(1, 10)(rng); + left_source_builder.addRow({k1, k2, left_t, -10 * left_t}); + + if (join_kind == JoinKind::Left) + expected.push_back(-10 * left_t); + } + + if (std::bernoulli_distribution(0.1)(rng)) + continue; + + size_t num_right_matches = std::uniform_int_distribution<>(1, 10)(rng); + auto right_t = left_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 0 : 1, 10)(rng); + auto attribute_value = 10 * right_t; + for (size_t j = 0; j < num_right_matches; ++j) + { + right_t += std::uniform_int_distribution<>(0, 3)(rng); + bool is_match = j == num_right_matches - 1; + right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * attribute_value : -1}); + } + + /// Next left_t should be greater than (or equals) right_t to match with previous rows + left_t = right_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 1 : 0, 100)(rng); + size_t num_left_matches = std::uniform_int_distribution<>(1, 100)(rng); + for (size_t j = 0; j < num_left_matches; ++j) + { + left_t += std::uniform_int_distribution<>(0, 3)(rng); + left_source_builder.addRow({k1, k2, left_t, attribute_value}); + expected.push_back(attribute_value); + } + } + + Block result_block = executePipeline(buildJoinPipeline( + left_source_builder.getSource(), right_source_builder.getSource(), + /* key_length = */ 3, + join_kind, JoinStrictness::Asof, asof_inequality)); + + assertColumnVectorEq(expected, result_block, "t1.attr"); + + for (auto & e : expected) + /// Non matched rows from left table have negative attr + /// Value if attribute in right table is 10 times greater than in left table + e = e < 0 ? 0 : 10 * e; + + assertColumnVectorEq(expected, result_block, "t2.attr"); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} diff --git a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp index c8ab2e3a973..34bc2eb2b5e 100644 --- a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp +++ b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp @@ -133,7 +133,7 @@ TEST(CheckSortedTransform, CheckBadLastRow) EXPECT_NO_THROW(executor.pull(chunk)); EXPECT_NO_THROW(executor.pull(chunk)); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } @@ -158,7 +158,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock1) Chunk chunk; -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } @@ -181,7 +181,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock2) PullingPipelineExecutor executor(pipeline); Chunk chunk; -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } @@ -204,7 +204,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock3) PullingPipelineExecutor executor(pipeline); Chunk chunk; -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 07366d7cc07..5bc2d09df35 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -666,7 +666,7 @@ void TCPHandler::runImpl() // Server should die on std logic errors in debug, like with assert() // or ErrorCodes::LOGICAL_ERROR. This helps catch these errors in // tests. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD catch (const std::logic_error & e) { state.io.onException(); diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index 857cfd78910..13394690227 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -357,7 +357,7 @@ void RefreshTask::refreshTask() stop_requested = true; tryLogCurrentException(log, "Unexpected exception in refresh scheduling, please investigate. The view will be stopped."); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD abortOnFailedAssertion("Unexpected exception in refresh scheduling"); #endif } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 57d95a98f11..b603d0ecf87 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -93,11 +93,6 @@ StorageMaterializedView::StorageMaterializedView( { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); - auto * storage_def = query.storage; - if (storage_def && storage_def->primary_key) - storage_metadata.primary_key = KeyDescription::getKeyFromAST(storage_def->primary_key->ptr(), - storage_metadata.columns, - local_context->getGlobalContext()); if (query.sql_security) storage_metadata.setSQLSecurity(query.sql_security->as()); @@ -110,12 +105,21 @@ StorageMaterializedView::StorageMaterializedView( throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); /// If the destination table is not set, use inner table - has_inner_table = query.to_table_id.empty(); - if (has_inner_table && !query.storage) + auto to_table_id = query.getTargetTableID(ViewTarget::To); + has_inner_table = to_table_id.empty(); + auto to_inner_uuid = query.getTargetInnerUUID(ViewTarget::To); + auto to_table_engine = query.getTargetInnerEngine(ViewTarget::To); + + if (has_inner_table && !to_table_engine) throw Exception(ErrorCodes::INCORRECT_QUERY, "You must specify where to save results of a MaterializedView query: " "either ENGINE or an existing table in a TO clause"); + if (to_table_engine && to_table_engine->primary_key) + storage_metadata.primary_key = KeyDescription::getKeyFromAST(to_table_engine->primary_key->ptr(), + storage_metadata.columns, + local_context->getGlobalContext()); + auto select = SelectQueryDescription::getSelectQueryFromASTForMatView(query.select->clone(), query.refresh_strategy != nullptr, local_context); if (select.select_table_id) { @@ -135,25 +139,25 @@ StorageMaterializedView::StorageMaterializedView( setInMemoryMetadata(storage_metadata); - bool point_to_itself_by_uuid = has_inner_table && query.to_inner_uuid != UUIDHelpers::Nil - && query.to_inner_uuid == table_id_.uuid; - bool point_to_itself_by_name = !has_inner_table && query.to_table_id.database_name == table_id_.database_name - && query.to_table_id.table_name == table_id_.table_name; + bool point_to_itself_by_uuid = has_inner_table && to_inner_uuid != UUIDHelpers::Nil + && to_inner_uuid == table_id_.uuid; + bool point_to_itself_by_name = !has_inner_table && to_table_id.database_name == table_id_.database_name + && to_table_id.table_name == table_id_.table_name; if (point_to_itself_by_uuid || point_to_itself_by_name) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Materialized view {} cannot point to itself", table_id_.getFullTableName()); if (!has_inner_table) { - target_table_id = query.to_table_id; + target_table_id = to_table_id; } else if (LoadingStrictnessLevel::ATTACH <= mode) { /// If there is an ATTACH request, then the internal table must already be created. - target_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID()), query.to_inner_uuid); + target_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID()), to_inner_uuid); } else { - const String & engine = query.storage->engine->name; + const String & engine = to_table_engine->engine->name; const auto & storage_features = StorageFactory::instance().getStorageFeatures(engine); /// We will create a query to create an internal table. @@ -161,8 +165,8 @@ StorageMaterializedView::StorageMaterializedView( auto manual_create_query = std::make_shared(); manual_create_query->setDatabase(getStorageID().database_name); manual_create_query->setTable(generateInnerTableName(getStorageID())); - manual_create_query->uuid = query.to_inner_uuid; - manual_create_query->has_uuid = query.to_inner_uuid != UUIDHelpers::Nil; + manual_create_query->uuid = to_inner_uuid; + manual_create_query->has_uuid = to_inner_uuid != UUIDHelpers::Nil; auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); @@ -184,7 +188,9 @@ StorageMaterializedView::StorageMaterializedView( } manual_create_query->set(manual_create_query->columns_list, new_columns_list); - manual_create_query->set(manual_create_query->storage, query.storage->ptr()); + + if (to_table_engine) + manual_create_query->set(manual_create_query->storage, to_table_engine); InterpreterCreateQuery create_interpreter(manual_create_query, create_context); create_interpreter.setInternal(true); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 72f725965e0..3f02486ed15 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1516,7 +1516,7 @@ static time_t tryGetPartCreateTime(zkutil::ZooKeeperPtr & zookeeper, const Strin void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart(const Strings & parts_in_zk, const Strings & parts_to_fetch) const { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD constexpr bool paranoid_check_for_covered_parts_default = true; #else constexpr bool paranoid_check_for_covered_parts_default = false; @@ -2383,7 +2383,7 @@ static void paranoidCheckForCoveredPartsInZooKeeper( const String & covering_part_name, const StorageReplicatedMergeTree & storage) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD constexpr bool paranoid_check_for_covered_parts_default = true; #else constexpr bool paranoid_check_for_covered_parts_default = false; diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 43b761d84b1..049a442d494 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -484,7 +484,8 @@ protected: if (ast_create && !context->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) { ast_create->uuid = UUIDHelpers::Nil; - ast_create->to_inner_uuid = UUIDHelpers::Nil; + if (ast_create->targets) + ast_create->targets->resetInnerUUIDs(); } if (columns_mask[src_index++]) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e15da0074d5..a39dd4e8f5f 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1209,8 +1209,11 @@ StorageWindowView::StorageWindowView( setInMemoryMetadata(storage_metadata); /// If the target table is not set, use inner target table - has_inner_target_table = query.to_table_id.empty(); - if (has_inner_target_table && !query.storage) + auto to_table_id = query.getTargetTableID(ViewTarget::To); + has_inner_target_table = to_table_id.empty(); + auto to_table_engine = query.getTargetInnerEngine(ViewTarget::To); + + if (has_inner_target_table && !to_table_engine) throw Exception(ErrorCodes::INCORRECT_QUERY, "You must specify where to save results of a WindowView query: " "either ENGINE or an existing table in a TO clause"); @@ -1225,12 +1228,12 @@ StorageWindowView::StorageWindowView( auto inner_query = initInnerQuery(query.select->list_of_selects->children.at(0)->as(), context_); - if (query.inner_storage) - inner_table_engine = query.inner_storage->clone(); + if (auto inner_storage = query.getTargetInnerEngine(ViewTarget::Inner)) + inner_table_engine = inner_storage->clone(); inner_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID())); inner_fetch_query = generateInnerFetchQuery(inner_table_id); - target_table_id = has_inner_target_table ? StorageID(table_id_.database_name, generateTargetTableName(table_id_)) : query.to_table_id; + target_table_id = has_inner_target_table ? StorageID(table_id_.database_name, generateTargetTableName(table_id_)) : to_table_id; if (is_proctime) next_fire_signal = getWindowUpperBound(now()); @@ -1255,7 +1258,7 @@ StorageWindowView::StorageWindowView( new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); target_create_query->set(target_create_query->columns_list, new_columns_list); - target_create_query->set(target_create_query->storage, query.storage->ptr()); + target_create_query->set(target_create_query->storage, to_table_engine); InterpreterCreateQuery create_interpreter_(target_create_query, create_context_); create_interpreter_.setInternal(true); diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py index 98a0345c6bd..86dcaf79854 100644 --- a/tests/ci/artifactory.py +++ b/tests/ci/artifactory.py @@ -43,7 +43,6 @@ class R2MountPoint: self.bucket_name = self._PROD_BUCKET_NAME self.aux_mount_options = "" - self.async_mount = False if self.app == MountPointApp.S3FS: self.cache_dir = "/home/ubuntu/s3fs_cache" # self.aux_mount_options += "-o nomodtime " if self.NOMODTIME else "" not for s3fs @@ -57,7 +56,6 @@ class R2MountPoint: self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}" elif self.app == MountPointApp.RCLONE: # run rclone mount process asynchronously, otherwise subprocess.run(daemonized command) will not return - self.async_mount = True self.cache_dir = "/home/ubuntu/rclone_cache" self.aux_mount_options += "--no-modtime " if self.NOMODTIME else "" self.aux_mount_options += "-v " if self.DEBUG else "" # -vv too verbose @@ -85,10 +83,12 @@ class R2MountPoint: Shell.run(_UNMOUNT_CMD) Shell.run(_MKDIR_CMD) Shell.run(_MKDIR_FOR_CACHE) - # didn't manage to use simple run() and not block or fail - Shell.run_as_daemon(self.mount_cmd) - if self.async_mount: - time.sleep(3) + if self.app == MountPointApp.S3FS: + Shell.run(self.mount_cmd, check=True) + else: + # didn't manage to use simple run() and without blocking or failure + Shell.run_as_daemon(self.mount_cmd) + time.sleep(3) Shell.run(_TEST_MOUNT_CMD, check=True) @classmethod @@ -107,6 +107,7 @@ class DebianArtifactory: _PROD_REPO_URL = "https://packages.clickhouse.com/deb" def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.release_info = release_info self.codename = release_info.codename self.version = release_info.version if dry_run: @@ -154,9 +155,8 @@ class DebianArtifactory: print("Running test command:") print(f" {cmd}") Shell.run(cmd, check=True) - release_info = ReleaseInfo.from_file() - release_info.debian_command = debian_command - release_info.dump() + self.release_info.debian_command = debian_command + self.release_info.dump() def _copy_if_not_exists(src: Path, dst: Path) -> Path: @@ -177,6 +177,7 @@ class RpmArtifactory: _SIGN_KEY = "885E2BDCF96B0B45ABF058453E4AD4719DDE9A38" def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.release_info = release_info self.codename = release_info.codename self.version = release_info.version if dry_run: @@ -230,9 +231,8 @@ class RpmArtifactory: print("Running test command:") print(f" {cmd}") Shell.run(cmd, check=True) - release_info = ReleaseInfo.from_file() - release_info.rpm_command = rpm_command - release_info.dump() + self.release_info.rpm_command = rpm_command + self.release_info.dump() class TgzArtifactory: @@ -240,6 +240,7 @@ class TgzArtifactory: _PROD_REPO_URL = "https://packages.clickhouse.com/tgz" def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.release_info = release_info self.codename = release_info.codename self.version = release_info.version if dry_run: @@ -290,9 +291,8 @@ class TgzArtifactory: expected_checksum == actual_checksum ), f"[{actual_checksum} != {expected_checksum}]" Shell.run("rm /tmp/tmp.tgz*") - release_info = ReleaseInfo.from_file() - release_info.tgz_command = cmd - release_info.dump() + self.release_info.tgz_command = cmd + self.release_info.dump() def parse_args() -> argparse.Namespace: @@ -340,9 +340,7 @@ def parse_args() -> argparse.Namespace: if __name__ == "__main__": args = parse_args() - assert args.dry_run - release_info = ReleaseInfo.from_file() """ Use S3FS. RCLONE has some errors with r2 remote which I didn't figure out how to resolve: ERROR : IO error: NotImplemented: versionId not implemented @@ -350,26 +348,38 @@ if __name__ == "__main__": """ mp = R2MountPoint(MountPointApp.S3FS, dry_run=args.dry_run) if args.export_debian: - with ReleaseContextManager(release_progress=ReleaseProgress.EXPORT_DEB) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.EXPORT_DEB + ) as release_info: mp.init() DebianArtifactory(release_info, dry_run=args.dry_run).export_packages() mp.teardown() if args.export_rpm: - with ReleaseContextManager(release_progress=ReleaseProgress.EXPORT_RPM) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.EXPORT_RPM + ) as release_info: mp.init() RpmArtifactory(release_info, dry_run=args.dry_run).export_packages() mp.teardown() if args.export_tgz: - with ReleaseContextManager(release_progress=ReleaseProgress.EXPORT_TGZ) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.EXPORT_TGZ + ) as release_info: mp.init() TgzArtifactory(release_info, dry_run=args.dry_run).export_packages() mp.teardown() if args.test_debian: - with ReleaseContextManager(release_progress=ReleaseProgress.TEST_DEB) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.TEST_DEB + ) as release_info: DebianArtifactory(release_info, dry_run=args.dry_run).test_packages() if args.test_tgz: - with ReleaseContextManager(release_progress=ReleaseProgress.TEST_TGZ) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.TEST_TGZ + ) as release_info: TgzArtifactory(release_info, dry_run=args.dry_run).test_packages() if args.test_rpm: - with ReleaseContextManager(release_progress=ReleaseProgress.TEST_RPM) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.TEST_RPM + ) as release_info: RpmArtifactory(release_info, dry_run=args.dry_run).test_packages() diff --git a/tests/ci/auto_release.py b/tests/ci/auto_release.py index 39ab3156c80..f2386fe207f 100644 --- a/tests/ci/auto_release.py +++ b/tests/ci/auto_release.py @@ -191,7 +191,7 @@ def main(): title=f"Auto Release Status for {release_info.release_branch}", body=release_info.to_dict(), ) - if args.post_auto_release_complete: + elif args.post_auto_release_complete: assert args.wf_status, "--wf-status Required with --post-auto-release-complete" if args.wf_status != SUCCESS: CIBuddy(dry_run=False).post_job_error( diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py index 3eba5532e66..688c7d59988 100644 --- a/tests/ci/ci_buddy.py +++ b/tests/ci/ci_buddy.py @@ -1,3 +1,4 @@ +import argparse import json import os from typing import Union, Dict @@ -7,7 +8,7 @@ import requests from botocore.exceptions import ClientError from pr_info import PRInfo -from ci_utils import Shell +from ci_utils import Shell, GHActions class CIBuddy: @@ -29,6 +30,11 @@ class CIBuddy: self.commit_url = pr_info.commit_html_url self.sha = pr_info.sha[:10] + def check_workflow(self): + res = GHActions.get_workflow_job_result(GHActions.ActionsNames.RunConfig) + if res != GHActions.ActionStatuses.SUCCESS: + self.post_job_error("Workflow Configuration Failed", critical=True) + @staticmethod def _get_webhooks(): name = "ci_buddy_web_hooks" @@ -139,7 +145,30 @@ class CIBuddy: self.post(message) +def parse_args(): + parser = argparse.ArgumentParser("CI Buddy bot notifies about CI events") + parser.add_argument( + "--check-wf-status", + action="store_true", + help="Checks workflow status", + ) + parser.add_argument( + "--test", + action="store_true", + help="for test and debug", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="dry run mode", + ) + return parser.parse_args(), parser + + if __name__ == "__main__": - # test - buddy = CIBuddy(dry_run=True) - buddy.post_job_error("TEst") + args, parser = parse_args() + + if args.test: + CIBuddy(dry_run=True).post_job_error("TEst") + elif args.check_wf_status: + CIBuddy(dry_run=args.dry_run).check_workflow() diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index efbf014cd52..1963e3f39d0 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,3 +1,4 @@ +import json import os import re import subprocess @@ -11,6 +12,9 @@ import requests class Envs: GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") + WORKFLOW_RESULT_FILE = os.getenv( + "WORKFLOW_RESULT_FILE", "/tmp/workflow_results.json" + ) LABEL_CATEGORIES = { @@ -79,6 +83,29 @@ def normalize_string(string: str) -> str: class GHActions: + class ActionsNames: + RunConfig = "RunConfig" + + class ActionStatuses: + ERROR = "error" + FAILURE = "failure" + PENDING = "pending" + SUCCESS = "success" + + @staticmethod + def get_workflow_job_result(wf_job_name: str) -> Optional[str]: + if not Path(Envs.WORKFLOW_RESULT_FILE).exists(): + print( + f"ERROR: Failed to get workflow results from file [{Envs.WORKFLOW_RESULT_FILE}]" + ) + return None + with open(Envs.WORKFLOW_RESULT_FILE, "r", encoding="utf-8") as json_file: + res = json.load(json_file) + if wf_job_name in res: + return res[wf_job_name]["result"] # type: ignore + else: + return None + @staticmethod def print_in_group(group_name: str, lines: Union[Any, List[Any]]) -> None: lines = list(lines) @@ -182,10 +209,11 @@ class Shell: check=False, ) if result.returncode == 0: + print(f"stdout: {result.stdout.strip()}") res = result.stdout else: print( - f"ERROR: stdout {result.stdout.strip()}, stderr {result.stderr.strip()}" + f"ERROR: stdout: {result.stdout.strip()}, stderr: {result.stderr.strip()}" ) if check: assert result.returncode == 0 diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index 4347cfebb54..a0b4083b673 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -43,6 +43,7 @@ class ReleaseProgress: TEST_TGZ = "test TGZ packages" TEST_RPM = "test RPM packages" TEST_DEB = "test DEB packages" + COMPLETED = "completed" class ReleaseProgressDescription: @@ -108,6 +109,12 @@ class ReleaseInfo: release_progress: str = "" progress_description: str = "" + def is_patch(self): + return self.release_branch != "master" + + def is_new_release_branch(self): + return self.release_branch == "master" + @staticmethod def from_file() -> "ReleaseInfo": with open(RELEASE_INFO_FILE, "r", encoding="utf-8") as json_file: @@ -126,12 +133,12 @@ class ReleaseInfo: release_tag = None previous_release_tag = None previous_release_sha = None - codename = None + codename = "" assert release_type in ("patch", "new") if release_type == "new": # check commit_ref is right and on a right branch Shell.run( - f"git merge-base --is-ancestor origin/{commit_ref} origin/master", + f"git merge-base --is-ancestor {commit_ref} origin/master", check=True, ) with checkout(commit_ref): @@ -146,9 +153,6 @@ class ReleaseInfo: git.latest_tag == expected_prev_tag ), f"BUG: latest tag [{git.latest_tag}], expected [{expected_prev_tag}]" release_tag = version.describe - codename = ( - VersionType.STABLE - ) # dummy value (artifactory won't be updated for new release) previous_release_tag = expected_prev_tag previous_release_sha = Shell.run_strict( f"git rev-parse {previous_release_tag}" @@ -205,7 +209,7 @@ class ReleaseInfo: and commit_sha and release_tag and version - and codename in ("lts", "stable") + and (codename in ("lts", "stable") or release_type == "new") ) self.release_branch = release_branch @@ -320,24 +324,27 @@ class ReleaseInfo: Shell.run( f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" ) - self.version_bump_pr = GHActions.get_pr_url_by_branch( - repo=GITHUB_REPOSITORY, branch=branch_upd_version_contributors - ) + self.version_bump_pr = "dry-run" + else: + self.version_bump_pr = GHActions.get_pr_url_by_branch( + repo=GITHUB_REPOSITORY, branch=branch_upd_version_contributors + ) def update_release_info(self, dry_run: bool) -> "ReleaseInfo": - branch = f"auto/{release_info.release_tag}" - if not dry_run: - url = GHActions.get_pr_url_by_branch(repo=GITHUB_REPOSITORY, branch=branch) - else: - url = "dry-run" - - print(f"ChangeLog PR url [{url}]") - self.changelog_pr = url - print(f"Release url [{url}]") - self.release_url = ( - f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" - ) - self.docker_command = f"docker run --rm clickhouse/clickhouse:{self.release_branch} clickhouse --version" + if self.release_branch != "master": + branch = f"auto/{release_info.release_tag}" + if not dry_run: + url = GHActions.get_pr_url_by_branch( + repo=GITHUB_REPOSITORY, branch=branch + ) + else: + url = "dry-run" + print(f"ChangeLog PR url [{url}]") + self.changelog_pr = url + print(f"Release url [{url}]") + self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" + if self.release_progress == ReleaseProgress.COMPLETED: + self.docker_command = f"docker run --rm clickhouse/clickhouse:{self.version} clickhouse --version" self.dump() return self @@ -712,13 +719,22 @@ if __name__ == "__main__": if args.post_status: release_info = ReleaseInfo.from_file() release_info.update_release_info(dry_run=args.dry_run) - if release_info.debian_command: + if release_info.is_new_release_branch(): + title = "New release branch" + else: + title = "New release" + if ( + release_info.progress_description == ReleaseProgressDescription.OK + and release_info.release_progress == ReleaseProgress.COMPLETED + ): + title = "Completed: " + title CIBuddy(dry_run=args.dry_run).post_done( - f"New release issued", dataclasses.asdict(release_info) + title, dataclasses.asdict(release_info) ) else: + title = "Failed: " + title CIBuddy(dry_run=args.dry_run).post_critical( - f"Failed to issue new release", dataclasses.asdict(release_info) + title, dataclasses.asdict(release_info) ) if args.set_progress_started: diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index d6cdb6d9c57..5177e112edd 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -50,7 +50,7 @@ set -uo pipefail # set accordingly to a runner role # #################################### -echo "Running init v1" +echo "Running init v1.1" export DEBIAN_FRONTEND=noninteractive export RUNNER_HOME=/home/ubuntu/actions-runner @@ -66,6 +66,14 @@ bash /usr/local/share/scripts/init-network.sh RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text) LABELS="self-hosted,Linux,$(uname -m),$RUNNER_TYPE" export LABELS +echo "Instance Labels: $LABELS" + +LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle) +export LIFE_CYCLE +echo "Instance lifecycle: $LIFE_CYCLE" + +INSTANCE_TYPE=$(ec2metadata --instance-type) +echo "Instance type: $INSTANCE_TYPE" # Refresh CloudWatch agent config aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json @@ -124,10 +132,6 @@ terminate_decrease_and_exit() { declare -f terminate_and_exit >> /tmp/actions-hooks/common.sh check_spot_instance_is_old() { - # This function should be executed ONLY BETWEEN runnings. - # It's unsafe to execute while the runner is working! - local LIFE_CYCLE - LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle) if [ "$LIFE_CYCLE" == "spot" ]; then local UPTIME UPTIME=$(< /proc/uptime) diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index 756a1e1996c..3af88759e82 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -43,15 +43,10 @@ def started_cluster(): config = """ - none - + strict {certificateFile} {privateKeyFile} {caConfig} - - - AcceptCertificateHandler - """ diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py index aa8030b077c..b7f9d7c0b96 100644 --- a/tests/integration/test_table_db_num_limit/test.py +++ b/tests/integration/test_table_db_num_limit/test.py @@ -4,9 +4,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance( - "node1", main_configs=["config/config.xml"], with_zookeeper=True -) +node = cluster.add_instance("node", main_configs=["config/config.xml"]) @pytest.fixture(scope="module") @@ -21,23 +19,28 @@ def started_cluster(): def test_table_db_limit(started_cluster): - for i in range(10): - node1.query("create database db{}".format(i)) + # By the way, default database already exists. + for i in range(9): + node.query("create database db{}".format(i)) with pytest.raises(QueryRuntimeException) as exp_info: - node1.query("create database db_exp".format(i)) + node.query("create database db_exp".format(i)) assert "TOO_MANY_DATABASES" in str(exp_info) for i in range(10): - node1.query("create table t{} (a Int32) Engine = Log".format(i)) + node.query("create table t{} (a Int32) Engine = Log".format(i)) + + # This checks that system tables are not accounted in the number of tables. + node.query("system flush logs") - node1.query("system flush logs") for i in range(10): - node1.query("drop table t{}".format(i)) + node.query("drop table t{}".format(i)) + for i in range(10): - node1.query("create table t{} (a Int32) Engine = Log".format(i)) + node.query("create table t{} (a Int32) Engine = Log".format(i)) with pytest.raises(QueryRuntimeException) as exp_info: - node1.query("create table default.tx (a Int32) Engine = Log") + node.query("create table default.tx (a Int32) Engine = Log") + assert "TOO_MANY_TABLES" in str(exp_info) diff --git a/tests/queries/0_stateless/00927_asof_join_correct_bt.reference b/tests/queries/0_stateless/00927_asof_join_correct_bt.reference index bb199d0159a..28c48d2e290 100644 --- a/tests/queries/0_stateless/00927_asof_join_correct_bt.reference +++ b/tests/queries/0_stateless/00927_asof_join_correct_bt.reference @@ -1,13 +1,36 @@ +-- { echoOn } +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 1 104 4 4 104 1 1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 1 104 4 4 104 1 1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 +SET join_algorithm = 'full_sorting_merge'; +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 diff --git a/tests/queries/0_stateless/00927_asof_join_correct_bt.sql b/tests/queries/0_stateless/00927_asof_join_correct_bt.sql index 281a81d51c0..d796b62d3b3 100644 --- a/tests/queries/0_stateless/00927_asof_join_correct_bt.sql +++ b/tests/queries/0_stateless/00927_asof_join_correct_bt.sql @@ -4,20 +4,29 @@ DROP TABLE IF EXISTS B; CREATE TABLE A(k UInt32, t UInt32, a UInt64) ENGINE = MergeTree() ORDER BY (k, t); INSERT INTO A(k,t,a) VALUES (1,101,1),(1,102,2),(1,103,3),(1,104,4),(1,105,5); -CREATE TABLE B(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); -SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); -DROP TABLE B; +CREATE TABLE B1(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B1(k,t,b) VALUES (1,102,2), (1,104,4); +CREATE TABLE B2(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B2(k,t,b) VALUES (1,102,2), (1,104,4); -CREATE TABLE B(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); -SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); -DROP TABLE B; +CREATE TABLE B3(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B3(k,t,b) VALUES (1,102,2), (1,104,4); -CREATE TABLE B(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); -SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); -DROP TABLE B; +-- { echoOn } +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); + +SET join_algorithm = 'full_sorting_merge'; +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); + +-- { echoOff } + +DROP TABLE B1; +DROP TABLE B2; +DROP TABLE B3; DROP TABLE A; diff --git a/tests/queries/0_stateless/00927_asof_join_long.reference b/tests/queries/0_stateless/00927_asof_join_long.reference index d4f015c68e4..ec40d2bc463 100644 --- a/tests/queries/0_stateless/00927_asof_join_long.reference +++ b/tests/queries/0_stateless/00927_asof_join_long.reference @@ -1 +1,2 @@ 3000000 +3000000 diff --git a/tests/queries/0_stateless/00927_asof_join_long.sql b/tests/queries/0_stateless/00927_asof_join_long.sql index c03a06d48d4..7a73875e93e 100644 --- a/tests/queries/0_stateless/00927_asof_join_long.sql +++ b/tests/queries/0_stateless/00927_asof_join_long.sql @@ -2,15 +2,28 @@ DROP TABLE IF EXISTS tvs; +-- to use different algorithms for in subquery +SET allow_experimental_analyzer = 1; + CREATE TABLE tvs(k UInt32, t UInt32, tv UInt64) ENGINE = Memory; INSERT INTO tvs(k,t,tv) SELECT k, t, t FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys -CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times; +CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times +SETTINGS join_algorithm = 'hash'; SELECT SUM(trades.price - tvs.tv) FROM (SELECT k, t, t as price FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys - CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times) trades + CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times + SETTINGS join_algorithm = 'hash') trades ASOF LEFT JOIN tvs USING(k,t); +SELECT SUM(trades.price - tvs.tv) FROM +(SELECT k, t, t as price + FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys + CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times + SETTINGS join_algorithm = 'hash') trades +ASOF LEFT JOIN tvs USING(k,t) +SETTINGS join_algorithm = 'full_sorting_merge'; + DROP TABLE tvs; diff --git a/tests/queries/0_stateless/00927_asof_join_noninclusive.reference b/tests/queries/0_stateless/00927_asof_join_noninclusive.reference index fe2844a2a43..d856372fb4a 100644 --- a/tests/queries/0_stateless/00927_asof_join_noninclusive.reference +++ b/tests/queries/0_stateless/00927_asof_join_noninclusive.reference @@ -27,3 +27,32 @@ 2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 +1 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0 +1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 +2 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0 +2 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0 +2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 +3 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:03 3 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:04 4 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:05 5 0 1970-01-01 00:00:00 0 +1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 +2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 +1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 +2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 diff --git a/tests/queries/0_stateless/00927_asof_join_noninclusive.sql b/tests/queries/0_stateless/00927_asof_join_noninclusive.sql index 5f15f3b593d..3cc99df4462 100644 --- a/tests/queries/0_stateless/00927_asof_join_noninclusive.sql +++ b/tests/queries/0_stateless/00927_asof_join_noninclusive.sql @@ -11,9 +11,12 @@ INSERT INTO B(k,t,b) VALUES (1,2,2),(1,4,4); INSERT INTO B(k,t,b) VALUES (2,3,3); SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); - SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t); +SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t); +SET join_algorithm = 'full_sorting_merge'; +SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t); SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t); DROP TABLE A; diff --git a/tests/queries/0_stateless/00927_asof_join_other_types.reference b/tests/queries/0_stateless/00927_asof_join_other_types.reference index 80c85ec1ae3..ddbc24ff925 100644 --- a/tests/queries/0_stateless/00927_asof_join_other_types.reference +++ b/tests/queries/0_stateless/00927_asof_join_other_types.reference @@ -1,27 +1,72 @@ +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- 2 1970-01-01 02:00:01 1 0 2 1970-01-01 02:00:03 3 3 2 1970-01-01 02:00:05 5 3 +- +2 1970-01-01 02:00:01 1 0 +2 1970-01-01 02:00:03 3 3 +2 1970-01-01 02:00:05 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1970-01-01 02:00:00.001 1 0 +2 1970-01-01 02:00:00.003 3 3 +2 1970-01-01 02:00:00.005 5 3 +- 2 1970-01-01 02:00:00.001 1 0 2 1970-01-01 02:00:00.003 3 3 2 1970-01-01 02:00:00.005 5 3 diff --git a/tests/queries/0_stateless/00927_asof_join_other_types.sh b/tests/queries/0_stateless/00927_asof_join_other_types.sh deleted file mode 100755 index 10173a3e43f..00000000000 --- a/tests/queries/0_stateless/00927_asof_join_other_types.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash - -set -e - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -for typename in "UInt32" "UInt64" "Float64" "Float32" "DateTime('Asia/Istanbul')" "Decimal32(5)" "Decimal64(5)" "Decimal128(5)" "DateTime64(3, 'Asia/Istanbul')" -do - $CLICKHOUSE_CLIENT -mn <= B.t ORDER BY (A.a, A.t); SELECT count() FROM A ASOF LEFT JOIN B ON A.a == B.b AND B.t <= A.t; SELECT A.a, A.t, B.b, B.t FROM A ASOF INNER JOIN B ON B.t <= A.t AND A.a == B.b ORDER BY (A.a, A.t); @@ -28,5 +36,8 @@ ASOF INNER JOIN (SELECT * FROM B UNION ALL SELECT 1, 3) AS B ON B.t <= A.t AND A WHERE B.t != 3 ORDER BY (A.a, A.t) ; +{% endfor -%} +{% endfor -%} + DROP TABLE A; DROP TABLE B; diff --git a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference index 1055a67ea5b..0aa1a85f19d 100644 --- a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference +++ b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference @@ -1,3 +1,6 @@ v1 o1 ['s2','s1'] v1 o2 ['s4'] v2 o3 ['s5','s3'] +v1 o1 ['s2','s1'] +v1 o2 ['s4'] +v2 o3 ['s5','s3'] diff --git a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql index 8a94b6ddd24..652cb35cf2a 100644 --- a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql +++ b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql @@ -16,3 +16,17 @@ GROUP BY ORDER BY visitorId ASC, orderId ASC; + +SELECT + visitorId, + orderId, + groupUniqArray(sessionId) +FROM sessions +ASOF INNER JOIN orders ON (sessions.visitorId = orders.visitorId) AND (sessions.date <= orders.date) +GROUP BY + visitorId, + orderId +ORDER BY + visitorId ASC, + orderId ASC +SETTINGS join_algorithm = 'full_sorting_merge'; diff --git a/tests/queries/0_stateless/01194_http_query_id.sh b/tests/queries/0_stateless/01194_http_query_id.sh index 5aebdc10dfc..42321112185 100755 --- a/tests/queries/0_stateless/01194_http_query_id.sh +++ b/tests/queries/0_stateless/01194_http_query_id.sh @@ -4,14 +4,22 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="http://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/?session_id=test_01194" -rnd=$RANDOM +rnd="$CLICKHOUSE_DATABASE" +url="${CLICKHOUSE_URL}&session_id=test_01194_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CURL} -sS "$url&query=SELECT+'test_01194',$rnd,1" > /dev/null ${CLICKHOUSE_CURL} -sS "$url&query=SELECT+'test_01194',$rnd,2" > /dev/null ${CLICKHOUSE_CURL} -sS "$url" --data "SELECT 'test_01194',$rnd,3" > /dev/null ${CLICKHOUSE_CURL} -sS "$url" --data "SELECT 'test_01194',$rnd,4" > /dev/null -${CLICKHOUSE_CURL} -sS "$url" --data "SYSTEM FLUSH LOGS" +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" -${CLICKHOUSE_CURL} -sS "$url&query=SELECT+count(DISTINCT+query_id)+FROM+system.query_log+WHERE+current_database+LIKE+currentDatabase()+AND+query+LIKE+'SELECT+''test_01194'',$rnd%25'" +$CLICKHOUSE_CLIENT -q " + SELECT + count(DISTINCT query_id) + FROM system.query_log + WHERE + current_database = currentDatabase() + AND event_date >= yesterday() + AND query LIKE 'SELECT ''test_01194'',$rnd%' + AND query_id != queryID()" diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.reference b/tests/queries/0_stateless/01338_long_select_and_alter.reference index c2678e7052e..027109252e1 100644 --- a/tests/queries/0_stateless/01338_long_select_and_alter.reference +++ b/tests/queries/0_stateless/01338_long_select_and_alter.reference @@ -1,3 +1,3 @@ -10 5 -CREATE TABLE default.alter_mt\n(\n `key` UInt64,\n `value` UInt64\n)\nENGINE = MergeTree\nORDER BY key\nSETTINGS index_granularity = 8192 +5 +CREATE TABLE default.alter_mt\n(\n `key` Int64,\n `value` Int64\n)\nENGINE = MergeTree\nORDER BY key\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index 2b0709162a3..5d2759ac884 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -7,16 +7,16 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" -$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENGINE=MergeTree() ORDER BY key" +$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key Int64, value String) ENGINE=MergeTree() ORDER BY key" -$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" +$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number - 1 AS x, toString(x) FROM numbers(5)" $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & -# to be sure that select took all required locks +# To be sure that select took all required locks for better test sensitivity, although it isn't guaranteed (then the test will also succeed). sleep 2 -$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value UInt64" +$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value Int64" $CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference index b4ed8efab63..65e638bc3a4 100644 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference @@ -1,3 +1,3 @@ -10 5 -CREATE TABLE default.alter_mt\n(\n `key` UInt64,\n `value` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01338_long_select_and_alter_zookeeper_default/alter_mt\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +5 +CREATE TABLE default.alter_mt\n(\n `key` Int64,\n `value` Int64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01338_long_select_and_alter_zookeeper_default/alter_mt\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index 41e0a12f369..593a96a7cc8 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -7,16 +7,16 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" -$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_mt', '1') ORDER BY key" +$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key Int64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_mt', '1') ORDER BY key" -$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" +$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number - 1 AS x, toString(x) FROM numbers(5)" $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & -# to be sure that select took all required locks +# To be sure that select took all required locks for better test sensitivity, although it isn't guaranteed (then the test will also succeed). sleep 2 -$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value UInt64" +$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value Int64" $CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" diff --git a/tests/queries/0_stateless/01592_long_window_functions1.sql b/tests/queries/0_stateless/01592_long_window_functions1.sql index d2d32e24eaa..671245599cc 100644 --- a/tests/queries/0_stateless/01592_long_window_functions1.sql +++ b/tests/queries/0_stateless/01592_long_window_functions1.sql @@ -8,14 +8,14 @@ drop table if exists stack; set max_insert_threads = 4; create table stack(item_id Int64, brand_id Int64, rack_id Int64, dt DateTime, expiration_dt DateTime, quantity UInt64) -Engine = MergeTree -partition by toYYYYMM(dt) +Engine = MergeTree +partition by toYYYYMM(dt) order by (brand_id, toStartOfHour(dt)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -insert into stack -select number%99991, number%11, number%1111, toDateTime('2020-01-01 00:00:00')+number/100, +insert into stack +select number%99991, number%11, number%1111, toDateTime('2020-01-01 00:00:00')+number/100, toDateTime('2020-02-01 00:00:00')+number/10, intDiv(number,100)+1 -from numbers_mt(10000000); +from numbers_mt(1000000); select '---- arrays ----'; @@ -32,8 +32,8 @@ select '---- window f ----'; select cityHash64( toString( groupArray (tuple(*) ) )) from ( select brand_id, rack_id, quantity from ( select brand_id, rack_id, quantity, row_number() over (partition by brand_id, rack_id order by quantity) rn - from stack ) as t0 - where rn <= 2 + from stack ) as t0 + where rn <= 2 order by brand_id, rack_id, quantity ) t; diff --git a/tests/queries/0_stateless/02240_asof_join_biginteger.reference b/tests/queries/0_stateless/02240_asof_join_biginteger.reference index cac55eec430..f7eb4d74375 100644 --- a/tests/queries/0_stateless/02240_asof_join_biginteger.reference +++ b/tests/queries/0_stateless/02240_asof_join_biginteger.reference @@ -2,3 +2,7 @@ 0 340282366920938463463374607431768211457 0 18446744073709551617 0 340282366920938463463374607431768211457 +0 18446744073709551617 +0 340282366920938463463374607431768211457 +0 18446744073709551617 +0 340282366920938463463374607431768211457 diff --git a/tests/queries/0_stateless/02240_asof_join_biginteger.sql b/tests/queries/0_stateless/02240_asof_join_biginteger.sql index 6dc5b00f116..a5c1faae4ea 100644 --- a/tests/queries/0_stateless/02240_asof_join_biginteger.sql +++ b/tests/queries/0_stateless/02240_asof_join_biginteger.sql @@ -3,3 +3,11 @@ select * from (select 0 as k, toInt256('340282366920938463463374607431768211457' select * from (select 0 as k, toUInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toUInt128('18446744073709551616') as v) t2 using(k, v); select * from (select 0 as k, toUInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toUInt256('340282366920938463463374607431768211456') as v) t2 using(k, v); + +SET join_algorithm = 'full_sorting_merge'; + +select * from (select 0 as k, toInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toInt128('18446744073709551616') as v) t2 using(k, v); +select * from (select 0 as k, toInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toInt256('340282366920938463463374607431768211456') as v) t2 using(k, v); + +select * from (select 0 as k, toUInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toUInt128('18446744073709551616') as v) t2 using(k, v); +select * from (select 0 as k, toUInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toUInt256('340282366920938463463374607431768211456') as v) t2 using(k, v); diff --git a/tests/queries/0_stateless/02241_join_rocksdb_bs.reference b/tests/queries/0_stateless/02241_join_rocksdb_bs.reference index 8416a2991c1..4dff9ef38ef 100644 --- a/tests/queries/0_stateless/02241_join_rocksdb_bs.reference +++ b/tests/queries/0_stateless/02241_join_rocksdb_bs.reference @@ -10,59 +10,3 @@ 1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 diff --git a/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 b/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 index 6121db6d6a2..e5703f99d62 100644 --- a/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 +++ b/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 @@ -1,4 +1,4 @@ --- Tags: use-rocksdb, long +-- Tags: use-rocksdb, long, no-s3-storage SET join_algorithm = 'direct'; @@ -13,27 +13,21 @@ INSERT INTO rdb_{{ table_size }} SELECT (sipHash64(number) % {{ table_size }}) as key, ('val' || toString(key)) AS value FROM numbers_mt({{ table_size }}); -{% for block_size in [10, 11, 128, 129, 65505, 65506, 70000] -%} - -{% if block_size * 5000 > table_size -%} - -SET max_block_size = {{ block_size }}; - {% for right_size in [table_size // 2, table_size + table_size // 4 + 1] -%} SELECT count() == (SELECT count() FROM rdb_{{ table_size }} WHERE key < {{ right_size }}) FROM (SELECT number as k FROM numbers_mt({{ right_size }})) as t1 INNER JOIN rdb_{{ table_size }} as rdb -ON rdb.key == t1.k; +ON rdb.key == t1.k +{% if table_size < 100 %}SETTINGS max_block_size = 1{% endif -%} +; SELECT count() == {{ right_size }} and countIf(value != '') == (SELECT count() FROM rdb_{{ table_size }} WHERE key < {{ right_size }}) FROM (SELECT number as k FROM numbers_mt({{ right_size }})) as t1 LEFT JOIN rdb_{{ table_size }} as rdb -ON rdb.key == t1.k; - -{% endfor -%} - -{% endif -%} +ON rdb.key == t1.k +{% if table_size < 100 %}SETTINGS max_block_size = 1{% endif -%} +; {% endfor -%} {% endfor -%} @@ -41,4 +35,3 @@ ON rdb.key == t1.k; {% for table_size in [10, 65555, 100000] -%} DROP TABLE IF EXISTS rdb_{{ table_size }}; {% endfor -%} - diff --git a/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql b/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql index a4e60ff54dd..0b10101d8f2 100644 --- a/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql +++ b/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql @@ -19,8 +19,6 @@ SELECT * FROM t1 ANTI JOIN t2 ON t1.key = t2.key; -- { serverError NOT_IMPLEMENT SELECT * FROM t1 SEMI JOIN t2 ON t1.key = t2.key; -- { serverError NOT_IMPLEMENTED } -SELECT * FROM t1 ASOF JOIN t2 ON t1.key = t2.key AND t1.val > t2.val; -- { serverError NOT_IMPLEMENTED } - SELECT * FROM t1 ANY JOIN t2 ON t1.key = t2.key SETTINGS any_join_distinct_right_table_keys = 1; -- { serverError NOT_IMPLEMENTED } SELECT * FROM t1 JOIN t2 USING (key) SETTINGS join_use_nulls = 1; -- { serverError NOT_IMPLEMENTED } diff --git a/tests/queries/0_stateless/02477_age_datetime64.reference b/tests/queries/0_stateless/02477_age_datetime64.reference index 3b4459dd26d..fb085f461c9 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.reference +++ b/tests/queries/0_stateless/02477_age_datetime64.reference @@ -111,3 +111,8 @@ SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), ma 1 SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); 1 +-- UBsan bug #66638 +set session_timezone = 'UTC'; +SELECT age('second', toDateTime(1157339245694594829, 6, 'UTC'), toDate('2015-08-18')) + +-8973935999 diff --git a/tests/queries/0_stateless/02477_age_datetime64.sql b/tests/queries/0_stateless/02477_age_datetime64.sql index 1bed93991ca..b5fa4da8837 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.sql +++ b/tests/queries/0_stateless/02477_age_datetime64.sql @@ -75,3 +75,7 @@ SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); + +-- UBsan bug #66638 +set session_timezone = 'UTC'; +SELECT age('second', toDateTime(1157339245694594829, 6, 'UTC'), toDate('2015-08-18')) diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference +++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql index 13dfb5debe7..6aa70a379c1 100644 --- a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql +++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql @@ -6,3 +6,15 @@ ASOF LEFT JOIN ( select 1 as session_id, 4 as id ) as visitors ON visitors.session_id <= sessions.id AND arrayFirst(a -> a, arrayMap((a) -> a, sessions.arr)) = visitors.id +; + +select count(*) +from ( + select 1 as id, [1, 2, 3] as arr +) as sessions +ASOF LEFT JOIN ( + select 1 as session_id, 4 as id +) as visitors +ON visitors.session_id <= sessions.id AND arrayFirst(a -> a, arrayMap((a) -> a, sessions.arr)) = visitors.id +SETTINGS join_algorithm = 'full_sorting_merge' +; diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index 4eb7e74446d..31a1cda18e7 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -35,6 +35,10 @@ SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS N 2 2 2 2 3 3 3 33 \N \N \N \N +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +2 2 2 2 +3 3 3 33 +\N \N \N \N -- aliases defined in the join condition are valid -- FIXME(@vdimir) broken query formatting for the following queries: -- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index f7813e2a1b4..f739259caf9 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -34,6 +34,7 @@ SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) A SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; -- aliases defined in the join condition are valid -- FIXME(@vdimir) broken query formatting for the following queries: diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference deleted file mode 100644 index b19d389d8d0..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference +++ /dev/null @@ -1,10 +0,0 @@ -before -rmt_master NewPart 0 1 -rmt_master MergeParts 0 1 -rmt_slave MergeParts 1 0 -rmt_slave DownloadPart 0 1 -after -rmt_master NewPart 0 1 -rmt_master MergeParts 0 1 -rmt_slave MergeParts 1 0 -rmt_slave DownloadPart 0 2 diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql deleted file mode 100644 index 548a8e5570a..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql +++ /dev/null @@ -1,35 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-shared-merge-tree --- SMT: The merge process is completely different from RMT - -drop table if exists rmt_master; -drop table if exists rmt_slave; - -create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by key settings always_fetch_merged_part=0; --- always_fetch_merged_part=1, consider this table as a "slave" -create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by key settings always_fetch_merged_part=1; - -insert into rmt_master values (1); - -system sync replica rmt_master; -system sync replica rmt_slave; -system stop replicated sends rmt_master; -optimize table rmt_master final settings alter_sync=1, optimize_throw_if_noop=1; - -select sleep(3) format Null; - -system flush logs; -select 'before'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -system start replicated sends rmt_master; --- sleep few seconds to try rmt_slave to fetch the part and reflect this error --- in system.part_log -select sleep(3) format Null; -system sync replica rmt_slave; - -system flush logs; -select 'after'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -drop table rmt_master; -drop table rmt_slave; diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference deleted file mode 100644 index aac9e7527d1..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference +++ /dev/null @@ -1,10 +0,0 @@ -before -rmt_master NewPart 0 1 -rmt_master MutatePart 0 1 -rmt_slave DownloadPart 0 1 -rmt_slave MutatePart 1 0 -after -rmt_master NewPart 0 1 -rmt_master MutatePart 0 1 -rmt_slave DownloadPart 0 2 -rmt_slave MutatePart 1 0 diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql deleted file mode 100644 index d8b5ebb3148..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql +++ /dev/null @@ -1,41 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-shared-merge-tree --- SMT: The merge process is completely different from RMT - -drop table if exists rmt_master; -drop table if exists rmt_slave; - -create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by tuple() settings always_fetch_merged_part=0, old_parts_lifetime=600; --- prefer_fetch_merged_part_*_threshold=0, consider this table as a "slave" -create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by tuple() settings prefer_fetch_merged_part_time_threshold=0, prefer_fetch_merged_part_size_threshold=0, old_parts_lifetime=600; - -insert into rmt_master values (1); - -system sync replica rmt_master; -system sync replica rmt_slave; -system stop replicated sends rmt_master; -system stop pulling replication log rmt_slave; -alter table rmt_master update key=key+100 where 1 settings alter_sync=1; - --- first we need to make the rmt_master execute mutation so that it will have --- the part, and rmt_slave will consider it instead of performing mutation on --- it's own, otherwise prefer_fetch_merged_part_*_threshold will be simply ignored -select sleep(3) format Null; -system start pulling replication log rmt_slave; --- and sleep few more seconds to try rmt_slave to fetch the part and reflect --- this error in system.part_log -select sleep(3) format Null; - -system flush logs; -select 'before'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -system start replicated sends rmt_master; -select sleep(3) format Null; -system sync replica rmt_slave; - -system flush logs; -select 'after'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -drop table rmt_master; -drop table rmt_slave; diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.reference b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference new file mode 100644 index 00000000000..2850a8aba98 --- /dev/null +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference @@ -0,0 +1,2 @@ +49999983751397 10000032 +49999983751397 10000032 diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql new file mode 100644 index 00000000000..17a67511030 --- /dev/null +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -0,0 +1,50 @@ +-- Tags: long + +DROP TABLE IF EXISTS build; +DROP TABLE IF EXISTS skewed_probe; + +SET session_timezone = 'UTC'; + +CREATE TABLE build ENGINE = MergeTree ORDER BY (key, begin) +AS + SELECT + toDateTime('1990-03-21 13:00:00') + INTERVAL number MINUTE AS begin, + number % 4 AS key, + number AS value + FROM numbers(0, 10000000); + +CREATE TABLE skewed_probe ENGINE = MergeTree ORDER BY (key, begin) +AS + SELECT + toDateTime('1990-04-21 13:00:01') + INTERVAL number MINUTE AS begin, + 0 AS key + FROM numbers(0, 5) + UNION ALL + SELECT + toDateTime('1990-05-21 13:00:01') + INTERVAL number MINUTE AS begin, + 1 AS key + FROM numbers(0, 10) + UNION ALL + SELECT + toDateTime('1990-06-21 13:00:01') + INTERVAL number MINUTE AS begin, + 2 AS key + FROM numbers(0, 20) + UNION ALL + SELECT + toDateTime('1990-03-21 13:00:01') + INTERVAL number MINUTE AS begin, + 3 AS key + FROM numbers(0, 10000000); + + +SELECT SUM(value), COUNT(*) +FROM skewed_probe +ASOF JOIN build +USING (key, begin) +; + +SELECT SUM(value), COUNT(*) +FROM skewed_probe +ASOF JOIN build +USING (key, begin) +SETTINGS join_algorithm = 'full_sorting_merge' +; diff --git a/tests/queries/0_stateless/03144_asof_join_ddb_doubles.reference b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.reference new file mode 100644 index 00000000000..f130f0a3f3b --- /dev/null +++ b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.reference @@ -0,0 +1,58 @@ +1 0 +2 0 +3 1 +4 1 +5 1 +6 2 +7 2 +8 3 +9 3 +0 0 +1 0 +2 0 +3 1 +4 1 +5 1 +6 2 +7 2 +8 3 +9 3 +1 1 0 +1 2 0 +1 3 1 +1 4 1 +1 5 1 +1 6 2 +1 7 2 +1 8 3 +1 9 3 +2 0 10 +2 1 10 +2 2 10 +2 3 10 +2 4 10 +2 5 10 +2 6 10 +2 7 20 +2 8 20 +2 9 20 +1 0 0 +1 1 0 +1 2 0 +1 3 1 +1 4 1 +1 5 1 +1 6 2 +1 7 2 +1 8 3 +1 9 3 +2 0 10 +2 1 10 +2 2 10 +2 3 10 +2 4 10 +2 5 10 +2 6 10 +2 7 20 +2 8 20 +2 9 20 diff --git a/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql new file mode 100644 index 00000000000..87aece14628 --- /dev/null +++ b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql @@ -0,0 +1,65 @@ +SET join_algorithm = 'full_sorting_merge'; +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS events0; + +CREATE TABLE events0 ( + begin Float64, + value Int32 +) ENGINE = MergeTree ORDER BY begin; + +INSERT INTO events0 VALUES (1.0, 0), (3.0, 1), (6.0, 2), (8.0, 3); + +SELECT p.ts, e.value +FROM + (SELECT number :: Float64 AS ts FROM numbers(10)) p +ASOF JOIN events0 e +ON p.ts >= e.begin +ORDER BY p.ts ASC; + +SELECT p.ts, e.value +FROM + (SELECT number :: Float64 AS ts FROM numbers(10)) p +ASOF LEFT JOIN events0 e +ON p.ts >= e.begin +ORDER BY p.ts ASC +-- SETTINGS join_use_nulls = 1 +; + +DROP TABLE IF EXISTS events0; + +DROP TABLE IF EXISTS events; +DROP TABLE IF EXISTS probes; + +CREATE TABLE events ( + key Int32, + begin Float64, + value Int32 +) ENGINE = MergeTree ORDER BY (key, begin); + +INSERT INTO events VALUES (1, 1.0, 0), (1, 3.0, 1), (1, 6.0, 2), (1, 8.0, 3), (2, 0.0, 10), (2, 7.0, 20), (2, 11.0, 30); + +CREATE TABLE probes ( + key Int32, + ts Float64 +) ENGINE = MergeTree ORDER BY (key, ts) AS +SELECT + key.number, + ts.number +FROM + numbers(1, 2) as key, + numbers(10) as ts +SETTINGS join_algorithm = 'hash'; + +SELECT p.key, p.ts, e.value +FROM probes p +ASOF JOIN events e +ON p.key = e.key AND p.ts >= e.begin +ORDER BY p.key, p.ts ASC; + +SELECT p.key, p.ts, e.value +FROM probes p +ASOF LEFT JOIN events e +ON p.key = e.key AND p.ts >= e.begin +ORDER BY p.key, p.ts ASC NULLS FIRST; + diff --git a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference new file mode 100644 index 00000000000..4aac918c98c --- /dev/null +++ b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference @@ -0,0 +1,73 @@ +- +2023-03-21 12:00:00 1970-01-01 00:00:00 -1 +2023-03-21 13:00:00 1970-01-01 00:00:00 -1 +2023-03-21 14:00:00 2023-03-21 13:00:00 0 +2023-03-21 15:00:00 2023-03-21 14:00:00 1 +2023-03-21 16:00:00 2023-03-21 15:00:00 2 +2023-03-21 17:00:00 2023-03-21 16:00:00 3 +2023-03-21 18:00:00 2023-03-21 16:00:00 3 +2023-03-21 19:00:00 2023-03-21 16:00:00 3 +2023-03-21 20:00:00 2023-03-21 16:00:00 3 +2023-03-21 21:00:00 2023-03-21 16:00:00 3 +2027-10-18 11:03:27 2023-03-21 16:00:00 3 +- +2023-03-21 12:00:00 1970-01-01 00:00:00 -1 +2023-03-21 13:00:00 1970-01-01 00:00:00 -1 +2023-03-21 14:00:00 2023-03-21 13:00:00 0 +2023-03-21 15:00:00 2023-03-21 14:00:00 1 +2023-03-21 16:00:00 2023-03-21 15:00:00 2 +2023-03-21 17:00:00 2023-03-21 16:00:00 3 +2023-03-21 18:00:00 2023-03-21 16:00:00 3 +2023-03-21 19:00:00 2023-03-21 16:00:00 3 +2023-03-21 20:00:00 2023-03-21 16:00:00 3 +2023-03-21 21:00:00 2023-03-21 16:00:00 3 +2027-10-18 11:03:27 2023-03-21 16:00:00 3 +\N \N \N +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 13:00:00 0 +2023-03-21 14:00:00 2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2023-03-21 15:00:00 2 +2023-03-21 16:00:00 2023-03-21 16:00:00 3 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +2027-10-18 11:03:27 2027-10-18 11:03:27 9 +- +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 13:00:00 0 +2023-03-21 14:00:00 2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2023-03-21 15:00:00 2 +2023-03-21 16:00:00 2023-03-21 16:00:00 3 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +2027-10-18 11:03:27 2027-10-18 11:03:27 9 +\N \N \N +- +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 14:00:00 1 +2023-03-21 14:00:00 2023-03-21 15:00:00 2 +2023-03-21 15:00:00 2023-03-21 16:00:00 3 +2023-03-21 16:00:00 2027-10-18 11:03:27 9 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +- +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 14:00:00 1 +2023-03-21 14:00:00 2023-03-21 15:00:00 2 +2023-03-21 15:00:00 2023-03-21 16:00:00 3 +2023-03-21 16:00:00 2027-10-18 11:03:27 9 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +2027-10-18 11:03:27 \N \N +\N \N \N diff --git a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql new file mode 100644 index 00000000000..d67aa254bd6 --- /dev/null +++ b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql @@ -0,0 +1,66 @@ +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; + +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; + +CREATE TABLE events0 ( + begin Nullable(DateTime('UTC')), + value Int32 +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO events0 SELECT toDateTime('2023-03-21 13:00:00', 'UTC') + INTERVAL number HOUR, number FROM numbers(4); +INSERT INTO events0 VALUES (NULL, -10),('0000-01-01 00:00:00', -1), ('9999-12-31 23:59:59', 9); + +CREATE TABLE probe0 ( + begin Nullable(DateTime('UTC')) +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO probe0 SELECT toDateTime('2023-03-21 12:00:00', 'UTC') + INTERVAl number HOUR FROM numbers(10); +INSERT INTO probe0 VALUES (NULL),('9999-12-31 23:59:59'); + +SET join_use_nulls = 1; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF JOIN events0 e +ON p.begin > e.begin +ORDER BY p.begin ASC; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF LEFT JOIN events0 e +ON p.begin > e.begin +ORDER BY p.begin ASC; + +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF JOIN events0 e +ON p.begin <= e.begin +ORDER BY p.begin ASC; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF LEFT JOIN events0 e +ON p.begin <= e.begin +ORDER BY p.begin ASC; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF JOIN events0 e +ON p.begin < e.begin +ORDER BY p.begin ASC; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF LEFT JOIN events0 e +ON p.begin < e.begin +ORDER BY p.begin ASC; + +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; diff --git a/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.reference b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.reference new file mode 100644 index 00000000000..ca481c7fff0 --- /dev/null +++ b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.reference @@ -0,0 +1,2 @@ +26790 1488 +26790 1488 diff --git a/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 new file mode 100644 index 00000000000..49ba70c471e --- /dev/null +++ b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 @@ -0,0 +1,39 @@ +-- Tags: long + +SET allow_experimental_analyzer=1; + +SET session_timezone = 'UTC'; + +{% for join_algorithm in ['default', 'full_sorting_merge'] -%} + +SET join_algorithm = '{{ join_algorithm }}'; + +-- TODO: enable once USING and `join_use_nulls` is supported by `full_sorting_merge` +-- SET join_use_nulls = 1; + +WITH build AS ( + SELECT + tk.number AS k, + toDateTime('2021-01-01 00:00:00') + INTERVAL i.number SECONDS AS t, + i.number % 37 AS v + FROM numbers(3000000) AS i + CROSS JOIN numbers(2) AS tk + SETTINGS join_algorithm = 'hash', join_use_nulls = 0 +), +probe AS ( + SELECT + tk.number AS k, + toDateTime('2021-01-01 00:00:30') + INTERVAL tt.number HOUR AS t + FROM numbers(2) AS tk + CROSS JOIN numbers(toUInt32((toDateTime('2021-02-01 00:00:30') - toDateTime('2021-01-01 00:00:30')) / 3600)) AS tt + SETTINGS join_algorithm = 'hash', join_use_nulls = 0 +) +SELECT + SUM(v) AS v, + COUNT(*) AS n +FROM probe +ASOF LEFT JOIN build +USING (k, t) +; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03147_asof_join_ddb_missing.reference b/tests/queries/0_stateless/03147_asof_join_ddb_missing.reference new file mode 100644 index 00000000000..11eb84463f4 --- /dev/null +++ b/tests/queries/0_stateless/03147_asof_join_ddb_missing.reference @@ -0,0 +1,10 @@ +108 +108 27 +513 +1218 +3528 +14553 +121275 +1495503 +12462525 +1249625025 diff --git a/tests/queries/0_stateless/03147_asof_join_ddb_missing.sql b/tests/queries/0_stateless/03147_asof_join_ddb_missing.sql new file mode 100644 index 00000000000..95a5f8ab3ff --- /dev/null +++ b/tests/queries/0_stateless/03147_asof_join_ddb_missing.sql @@ -0,0 +1,186 @@ +SET allow_experimental_analyzer=1; + +SET session_timezone = 'UTC'; +SET joined_subquery_requires_alias = 0; +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; + +-- # 10 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(10), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # Coverage: Missing right side bin +WITH build AS ( + SELECT + k * 2 AS k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(10), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + intDiv(k, 2) AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v), COUNT(*) +FROM probe ASOF JOIN build USING (k, t); + +-- # 20 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(20), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 30 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(30), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 50 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(50), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 100 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(100), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 100 dates, 50 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(100), (SELECT number AS k FROM numbers(50)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 1000 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(1000), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 1000 dates, 50 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(1000), (SELECT number AS k FROM numbers(50)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 10000 dates, 50 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(10000), (SELECT number AS k FROM numbers(50)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); diff --git a/tests/queries/0_stateless/03148_asof_join_ddb_subquery.reference b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.reference new file mode 100644 index 00000000000..387a4a8f249 --- /dev/null +++ b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.reference @@ -0,0 +1,4 @@ +1 1 +3 1 +6 1 +8 1 diff --git a/tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql new file mode 100644 index 00000000000..2ddf0f09b1e --- /dev/null +++ b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS events; +CREATE TABLE events (begin Float64, value Int32) ENGINE = MergeTree() ORDER BY begin; + +INSERT INTO events VALUES (1, 0), (3, 1), (6, 2), (8, 3); + +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; +SET joined_subquery_requires_alias = 0; + +SELECT + begin, + value IN ( + SELECT e1.value + FROM ( + SELECT * + FROM events e1 + WHERE e1.value = events.value + ) AS e1 + ASOF JOIN ( + SELECT number :: Float64 AS begin + FROM numbers(10) + WHERE number >= 1 AND number < 10 + ) + USING (begin) + ) +FROM events +ORDER BY begin ASC; + +DROP TABLE IF EXISTS events; diff --git a/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference new file mode 100644 index 00000000000..7cfc85d23a5 --- /dev/null +++ b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference @@ -0,0 +1,56 @@ +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +2023-03-21 12:00:00 \N +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +\N \N +2023-03-21 12:00:00 0 +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +\N 0 +2023-03-21 12:00:00 \N +2023-03-21 13:00:00 \N +2023-03-21 14:00:00 \N +2023-03-21 15:00:00 \N +2023-03-21 16:00:00 \N +2023-03-21 17:00:00 \N +2023-03-21 18:00:00 \N +2023-03-21 19:00:00 \N +2023-03-21 20:00:00 \N +2023-03-21 21:00:00 \N +2106-02-07 06:28:15 \N +\N \N diff --git a/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql new file mode 100644 index 00000000000..cd83d62dc70 --- /dev/null +++ b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql @@ -0,0 +1,95 @@ +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; + +SET session_timezone = 'UTC'; +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; +SET join_use_nulls = 1; + +CREATE TABLE events0 +ENGINE = MergeTree() +ORDER BY COALESCE(begin, toDateTime('9999-12-31 23:59:59')) +AS +SELECT + toNullable(toDateTime('2023-03-21 13:00:00') + INTERVAL number HOUR) AS begin, + number AS value +FROM numbers(4); + +INSERT INTO events0 VALUES (NULL, -1), (toDateTime('9999-12-31 23:59:59'), 9); + +CREATE TABLE probe0 +ENGINE = MergeTree() +ORDER BY COALESCE(begin, toDateTime('9999-12-31 23:59:59')) +AS +SELECT + toNullable(toDateTime('2023-03-21 12:00:00') + INTERVAL number HOUR) AS begin +FROM numbers(10); + +INSERT INTO probe0 VALUES (NULL), (toDateTime('9999-12-31 23:59:59')); + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF JOIN events0 e ON p.begin >= e.begin +ORDER BY p.begin ASC; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF JOIN events0 e USING (begin) +ORDER BY p.begin ASC +SETTINGS join_use_nulls = 0 +; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF LEFT JOIN events0 e ON p.begin >= e.begin +ORDER BY p.begin ASC; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF LEFT JOIN events0 e USING (begin) +ORDER BY p.begin ASC +SETTINGS join_use_nulls = 0 +; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF RIGHT JOIN events0 e ON p.begin >= e.begin +ORDER BY e.begin ASC; -- { serverError NOT_IMPLEMENTED} + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF RIGHT JOIN events0 e USING (begin) +ORDER BY e.begin ASC; -- { serverError NOT_IMPLEMENTED} + + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF LEFT JOIN ( + SELECT * FROM events0 WHERE log(value + 5) > 10 + ) e ON p.begin + INTERVAL 2 HOUR >= e.begin + INTERVAL 1 HOUR +ORDER BY p.begin ASC; + + +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 862f38976ce..8e4e4fafe29 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2541,6 +2541,7 @@ sqlite sqrt src srcReplicas +sshkey stackoverflow stacktrace stacktraces